## Pytorch Quick Tutorial

https://github.com/chenyuntc/pytorch-book

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
print(torch.__version__)

0.4.1


### 1. Tensor

Manipulations are very similar with `numpy` array.

In [2]:
# tensor
X = torch.Tensor(5, 3)
X

tensor([[0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000]])

In [3]:
# tensor
X = torch.Tensor([[1, 2], [3, 4]])
X

tensor([[1., 2.],
        [3., 4.]])

In [4]:
# random (uniform distribution)
X = torch.rand(4, 4)
X

tensor([[0.0929, 0.0449, 0.4525, 0.9143],
        [0.3625, 0.8667, 0.0922, 0.2342],
        [0.7313, 0.7269, 0.6557, 0.7181],
        [0.5245, 0.4247, 0.8608, 0.4895]])

In [5]:
X.size(), X.shape

(torch.Size([4, 4]), torch.Size([4, 4]))

In [6]:
# add
X = torch.rand(1, 2)
y = torch.rand(1, 2)

print(X + y)
print(torch.add(X, y))

tensor([[0.9054, 1.5732]])
tensor([[0.9054, 1.5732]])


In [7]:
# interaction with numpy array
print(X.numpy())

X = np.random.randn(1, 2)
print(X)

X = torch.from_numpy(X)
print(X)

[[ 0.27921349  0.79793227]]
[[ 0.36253728 -0.5846798 ]]
tensor([[ 0.3625, -0.5847]], dtype=torch.float64)


In [8]:
# slice / indexing - still get torch tensor
X[0][1]

tensor(-0.5847, dtype=torch.float64)

In [9]:
# use .item() to extract value
X[0][1].item()

-0.5846797974288228

In [10]:
# torch.tensor() will do copy
# use torch.from_numpy() or tensor.detach() to build new tensor to share memory
X_c = X.detach()
X_ = torch.tensor(X)
X[0, 1] = 10000
print(X)
print(X_) 
print(X_c)

tensor([[3.6254e-01, 1.0000e+04]], dtype=torch.float64)
tensor([[ 0.3625, -0.5847]], dtype=torch.float64)
tensor([[3.6254e-01, 1.0000e+04]], dtype=torch.float64)


### 2. Autograd

In [11]:
# or use X.requires_grad = True
X = torch.ones(2, 2, requires_grad=True)
X

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [12]:
y = torch.sum(X)
y

tensor(4., grad_fn=<SumBackward0>)

In [13]:
y.grad_fn

<SumBackward0 at 0x73b67f0>

In [14]:
# do back-prop
y.backward()

In [15]:
# dX
X.grad

tensor([[1., 1.],
        [1., 1.]])

In [16]:
# do back-prop again, grads are accumlated
y.backward()
X.grad

tensor([[2., 2.],
        [2., 2.]])

In [17]:
# reset
X.grad.data.zero_()
X.grad

tensor([[0., 0.],
        [0., 0.]])

### 3. Build a Simple NN

In [18]:
class NeuralNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.fc1 = nn.Linear(in_features=16*5*5, out_features=120, bias=True)
        self.fc2 = nn.Linear(in_features=120, out_features=84, bias=True)
        self.fc3 = nn.Linear(in_features=84, out_features=10, bias=True)
    
    def forward(self, x):
        x = F.max_pool2d(input=F.relu(self.conv1(x)), kernel_size=(2, 2))
        x = F.max_pool2d(input=F.relu(self.conv2(x)), kernel_size=2)
        # reshape
        # torch.tensor.view() -> new tensor with same data but different shape
        x = x.view(x.size()[0], -1) 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [19]:
model = NeuralNet()
model

NeuralNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [20]:
params = model.parameters() # generator
params = list(params)

In [21]:
# .named_parameters() returns trainable parameter name
for name, param in model.named_parameters():
    print(f'{name:15s}: {param.size()}')

conv1.weight   : torch.Size([6, 1, 5, 5])
conv1.bias     : torch.Size([6])
conv2.weight   : torch.Size([16, 6, 5, 5])
conv2.bias     : torch.Size([16])
fc1.weight     : torch.Size([120, 400])
fc1.bias       : torch.Size([120])
fc2.weight     : torch.Size([84, 120])
fc2.bias       : torch.Size([84])
fc3.weight     : torch.Size([10, 84])
fc3.bias       : torch.Size([10])


- Forward Prop

In [22]:
inp = torch.randn(1, 1, 32, 32)
out = model(inp)
out.size()

torch.Size([1, 10])

In [23]:
out

tensor([[-0.0278,  0.0449, -0.0919, -0.0156,  0.0256,  0.0244, -0.0740, -0.0356,
          0.0816,  0.0024]], grad_fn=<ThAddmmBackward>)

- Backward Prop

In [24]:
model.conv1.bias.grad

In [25]:
# set as 0 first
model.zero_grad()
out.backward(torch.ones(1, 10))

In [26]:
model.conv1.bias.grad

tensor([-0.0355, -0.0345,  0.0463,  0.0280, -0.0086, -0.0035])

- Loss Function

In [27]:
model = NeuralNet()
inp = torch.randn(1, 1, 32, 32)
out = model(inp)
target = torch.arange(0., 10.).view(1, 10)

# L(y_h, y)
func = nn.MSELoss()
loss = func(out, target)
loss

tensor(28.2352, grad_fn=<MseLossBackward>)

In [28]:
model.zero_grad()
print(f'grads of conv1.bias before back-prop = {model.conv1.bias.grad}')

loss.backward()
print(f'grads of conv1.bias after back-prop = {model.conv1.bias.grad}')

grads of conv1.bias before back-prop = None
grads of conv1.bias after back-prop = tensor([-0.0591, -0.0008,  0.0274,  0.0043, -0.1176, -0.0581])


- Optimization

In [29]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

# set
optimizer.zero_grad()

# loss
out = model(inp)
loss = func(out, target)

# back-prop
loss.backward()

# update parameters (W, b)
optimizer.step()