In [1]:
import torch
torch.__version__

'1.7.1+cpu'

In [2]:
# 创建矩阵
x = torch.empty(5, 3)
x = torch.rand(5, 2)
x

tensor([[0.8595, 0.2654],
        [0.7043, 0.3573],
        [0.8730, 0.6549],
        [0.3220, 0.5844],
        [0.8995, 0.4817]])

In [3]:
torch.zeros(2, 3, dtype=torch.long)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [4]:
# 直接传入数据
torch.tensor([1, 2, 3])
x.new_ones(5, 2)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])

In [5]:
torch.randn_like(x)

tensor([[ 1.1838,  0.4668],
        [-0.9403,  0.4541],
        [-1.3573,  0.4860],
        [-0.4210, -1.0530],
        [ 0.9655,  1.6232]])

In [6]:
x.size()

torch.Size([5, 2])

## 变换维度

In [7]:
x.view(10).size()
x.view(2, 5).size()

torch.Size([2, 5])

## 与numpy arr 互转

In [8]:
x.numpy()
import numpy as np
torch.from_numpy(np.array([1, 2, 3]))

# 转换之后共享同一块内存空间，如果tensor修改，则numpy跟着变换

tensor([1, 2, 3], dtype=torch.int32)

## 自动求导

In [9]:
x = torch.randn(3, 4, requires_grad=True)   #表示可以对x进行求导
x
# 当设置 requires_grad=True 时，会开辟另外一块空间，x.grad 也是tensor，存放x的梯度值

tensor([[ 0.6069,  1.0407, -0.1315, -0.4904],
        [-1.3172, -0.2398, -1.3741,  0.4628],
        [ 1.3226, -0.1912, -0.9714,  0.0145]], requires_grad=True)

In [10]:
b = torch.randn(3, 4, requires_grad=True)
t = x + b
y = t.sum()
y

tensor(4.3631, grad_fn=<SumBackward0>)

In [11]:
y.backward()# 需要在最后一步标注一下

y.backward(retain_graph=True) # 如果设置此参数，会把之前的梯度都累加起来，b.grad 返回的是累加梯度

b.grad

tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.]])

In [12]:
x.requires_grad, b.requires_grad, t.requires_grad
# 虽然没有给t指定，但是

(True, True, True)

## 线性回归

In [13]:
x = np.array([i for i in range(10)]).astype(np.float32).reshape(-1, 1)
y = 2* x + 1

In [14]:
import torch.nn as nn
class LinerRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinerRegressionModel, self).__init__()
        # 全连接层
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        # 前向传播
        out = self.linear(x)
        return out
    
model = LinerRegressionModel(1, 1)
model

LinerRegressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

In [15]:
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

In [16]:
for epoch in range(epochs):
    epoch += 1
    
    # 需要将np.array 转为 tensor
    inputs = torch.from_numpy(x)
    labels = torch.from_numpy(y)
    
    # 每一次迭代梯度要清零
    optimizer.zero_grad()
    
    # 前向传播
    outputs = model(inputs)
    
    # loss
    loss = criterion(outputs, labels)
    
    # 反向传播
    loss.backward()
    
    # 更新权重参数
    optimizer.step()
    if epoch % 50 == 0:
        print("epoch {0}, loss {1}".format(epoch, loss.item()))

epoch 50, loss 0.06582018733024597
epoch 100, loss 0.03735992684960365
epoch 150, loss 0.021205667406320572
epoch 200, loss 0.012036464177072048
epoch 250, loss 0.0068319677375257015
epoch 300, loss 0.003877840470522642
epoch 350, loss 0.002201108494773507
epoch 400, loss 0.001249360153451562
epoch 450, loss 0.000709150917828083
epoch 500, loss 0.0004025031812489033
epoch 550, loss 0.0002284664224134758
epoch 600, loss 0.0001296770788030699
epoch 650, loss 7.360892050201073e-05
epoch 700, loss 4.1781571781029925e-05
epoch 750, loss 2.3716762370895594e-05
epoch 800, loss 1.346182580164168e-05
epoch 850, loss 7.642683158337604e-06
epoch 900, loss 4.337319751357427e-06
epoch 950, loss 2.4614178073534276e-06
epoch 1000, loss 1.3970043255540077e-06


In [17]:
model(torch.from_numpy(x).requires_grad_()).data.numpy()

array([[ 0.9978157],
       [ 2.9981642],
       [ 4.9985123],
       [ 6.998861 ],
       [ 8.999209 ],
       [10.9995575],
       [12.999907 ],
       [15.000255 ],
       [17.000603 ],
       [19.000952 ]], dtype=float32)

## 模型的保存和读取

In [18]:
torch.save(model.state_dict(), 'model.pkl')

In [19]:
model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

## 使用 GPU 进行训练

In [20]:
# 需要把数据传入到 cuda中
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

inputs = torch.from_numpy(x).to(device)
labels = torch.from_numpy(y).to(device)

# 1. 将模型传到 GPU。 
# 2. 将数据传到 GPU。

torch.ones_like(inputs, device=device)  # 将tensor直接定义到GPU
label.to("cpu", torch.double)   # 传到CPU 

## 常见的tensor形式
### 1. scalar
### 2. vector
### 3. matrix
### 4. n-dimensional tensor

In [22]:
x = torch.tensor(42.)
print(x)
print(x.item())

tensor(42.)
42.0


In [26]:
torch.tensor([1, 2, 3])
m = torch.tensor([[1,2], [3, 4]])

In [27]:
m.matmul(m)

tensor([[ 7, 10],
        [15, 22]])

In [28]:
  m * m

tensor([[ 1,  4],
        [ 9, 16]])

## hub 模块
### 加载预训练模型

In [32]:
torch.hub.list("pytorch/vision:v0.4.2")

Downloading: "https://github.com/pytorch/vision/archive/v0.4.2.zip" to C:\Users\xiexiaoxuan/.cache\torch\hub\v0.4.2.zip


ModuleNotFoundError: No module named 'PIL'

 ## 构建神经网络

In [38]:
x = torch.from_numpy(np.array([[1], [2]], dtype=float))
y = torch.from_numpy(np.array([3, 4], dtype=float))

weight = torch.randn((1, 128), dtype=float, requires_grad=True)
biases = torch.randn(128, dtype=float, requires_grad=True)
weight2 = torch.randn((128, 1), dtype=float, requires_grad=True)
biases2 = torch.randn(1, dtype=float, requires_grad=True)

learning_rate = 0.001
losses = []

for i in range(1000):
    hidden = x.mm(weight) + biases
    hidden = torch.relu(hidden)
    predictions = hidden.mm(weight2) + biases2
    loss = torch.mean((predictions - y)**2)
    losses.append(loss.data.numpy())
    
    if i % 100 == 0:
        print("loss", loss)
    loss.backward()
    
    weight.data.add_(- learning_rate * weight.grad.data)
    biases.data.add_(- learning_rate * biases.grad.data)
    weight2.data.add_(- learning_rate * weight2.grad.data)
    biases2.data.add_(- learning_rate * biases2.grad.data)
    
    weight.grad.data.zero_()
    biases.grad.data.zero_()
    weight2.grad.data.zero_()
    biases2.grad.data.zero_()

loss tensor(625.7877, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)
loss tensor(0.2500, dtype=torch.float64, grad_fn=<MeanBackward0>)


In [None]:
# 构建网络比较简单的方法
input_size = input_features.shape[1]
hidden_size = 128
output_size = 1
batch_size = 16

my_nn = torch.nn.Sequential(
    torch.nn.Linear(input_size, hidden_size),
    torch.nn.Sigmoid(),
    torch.nn.Linear(hidden_size, output_size),
)
cost = torch.nn.MSELoss(reduction="mean")
optimizer = torch.optim.Adam(my_nn.parameters(), lr=0.001)

losses = []
for i in range(1000):
    # mini-batch
    for start in range(0, len(x), batch_size):
        ...
        prediction = my_nn(input_x)
        loss = cost(prediction, output_y)
        optimizer.zero_grad()
        loss.backward(retain_graph=True)# retain_graph 是否需要重复执行此代码
        optimizer.step()
        batch_loss.append(loss.data.numpy())

## torch.nn.functional

In [None]:
import torch.nn.functional as F
loss_func = F.cross_entropy
loss_func(y_pred, y)

## torch.nn.Module

In [45]:
# 必须继承nn.Module 且在其构造函数中需要调用nn.Module的构造函数
# 无需写反向传播函数，nn.Module能够利用autograd自动实现反向传播
# Module 中的可学习参数可以通过 named_parameters() 或者 parameters() 返回迭代器

from torch import nn

class Mnist_NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(784, 128)
        self.hidden2 = nn.Linear(128, 256)
        self.out = nn.Linear(256, 10)
        
    def forward(self, x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = self.out(x)
        return x

In [46]:
model = Mnist_NN()
print(model)

Mnist_NN(
  (hidden1): Linear(in_features=784, out_features=128, bias=True)
  (hidden2): Linear(in_features=128, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)


In [48]:
for name, parameter in model.named_parameters():
    print(name, parameter, parameter.size())

hidden1.weight Parameter containing:
tensor([[ 0.0172, -0.0332,  0.0010,  ..., -0.0230, -0.0283,  0.0023],
        [-0.0085,  0.0022,  0.0139,  ...,  0.0103, -0.0030, -0.0200],
        [ 0.0181,  0.0345,  0.0235,  ...,  0.0194, -0.0295,  0.0158],
        ...,
        [-0.0234,  0.0125,  0.0062,  ...,  0.0277, -0.0077,  0.0292],
        [-0.0233,  0.0351,  0.0119,  ..., -0.0062, -0.0264,  0.0169],
        [-0.0034, -0.0234,  0.0243,  ...,  0.0256,  0.0046, -0.0133]],
       requires_grad=True) torch.Size([128, 784])
hidden1.bias Parameter containing:
tensor([-0.0128,  0.0149,  0.0081, -0.0106,  0.0272, -0.0277,  0.0066, -0.0211,
         0.0080,  0.0029, -0.0234,  0.0152, -0.0098,  0.0317,  0.0245, -0.0321,
         0.0267, -0.0114, -0.0148,  0.0166, -0.0252,  0.0274, -0.0037,  0.0205,
        -0.0245, -0.0086,  0.0154,  0.0152,  0.0231,  0.0137,  0.0216, -0.0062,
         0.0272,  0.0278, -0.0157, -0.0343, -0.0064, -0.0219, -0.0137, -0.0030,
        -0.0227,  0.0336,  0.0063,  0.0321, 

## TensorDataset and DataLoader

In [None]:
from torch.utils.data import TensorDataset, DataLoader

train_ds = TensorDataset(x, y)
train_dl = DataLoader(train_ds, batch_size=128, shuffle=True)

# 一般在训练模型的时候加上 model.train(), 这样会正常使用 batch Normalization 和 dropout
# 测试的时候一般选择 model.eval()， 这样就不会使用 batch normalization 和 dropout

for step in range(steps):
    model.train()
    for x_train, y_train in train_dl:
        loss_batch(...)
    
    model.eval()
    with torch.no_grad():
        losses, nums = zip(*[loss_batch(...) for x_valid, y_valid in valid_dl])
    valid_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

## 卷积神经网络模块构建
#### 卷积最后结果是一个特征图，需要把图转换成向量才能做分类或者回归任务

In [56]:
import torch
from torch import nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            F.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.out = nn.Linear(32*7*7, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(s.size(0), -1)
        output = self.out(x)

In [57]:
def accuracy(predictions, labels):
    pred = torch.max(predictions.data, 1)[1]
    rights = pred.eq(labels.data.view_as(pred)).sum()
    return rights, len(labels)

In [62]:
net = CNN()
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

for epoch in range(num_epochs):
    train_rights = []
    
    for batch_index, (data, target) in enumerate(train_loader):
        net.train()
        output = net(data)
        loss_num = loss(output, target)
        optimizer.zero_grad()
        
        loss_num.backward()
        optimizer.step()
        right = accuracy(output, target)
        train_right.append(right)
        

AttributeError: module 'torch.nn.functional' has no attribute 'ReLU'

In [65]:
from torch.nn.functional import relu 

## torchversion

In [None]:
# dataset, 预训练model，transform 数据转换增强...
pip install torchversion