In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import numpy as np

# Tensors

In [3]:
torch.empty(5, 3)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [4]:
torch.rand(5, 3)

tensor([[0.5360, 0.9640, 0.1853],
        [0.4346, 0.1448, 0.9242],
        [0.1390, 0.3169, 0.7723],
        [0.6147, 0.9605, 0.8629],
        [0.7233, 0.3854, 0.7981]])

In [5]:
torch.zeros(5, 3, dtype=torch.long)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

In [7]:
torch.tensor([5.5, 3])

torch.Size([2])


tensor([5.5000, 3.0000])

# Operations

In [14]:
x = torch.rand(5, 3)
y = torch.rand(5, 3)

z = x + y

print(x), print(y), print(z);

tensor([[0.9012, 0.2894, 0.7550],
        [0.4647, 0.7483, 0.7746],
        [0.6702, 0.7412, 0.5382],
        [0.2512, 0.9229, 0.3870],
        [0.1505, 0.8481, 0.9838]])
tensor([[0.8694, 0.8008, 0.2592],
        [0.7273, 0.3538, 0.0994],
        [0.5323, 0.9855, 0.5318],
        [0.7599, 0.3284, 0.2819],
        [0.9614, 0.0394, 0.8843]])
tensor([[1.7706, 1.0902, 1.0141],
        [1.1920, 1.1021, 0.8740],
        [1.2025, 1.7268, 1.0700],
        [1.0111, 1.2514, 0.6688],
        [1.1118, 0.8875, 1.8681]])


In [17]:
x[:, 1]

tensor([0.2894, 0.7483, 0.7412, 0.9229, 0.8481])

In [22]:
x = torch.randn(4, 4)

y = x.view(-1, 8)
z = x.view(16)

print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([2, 8]) torch.Size([16])


In [23]:
x = torch.randn(1)

x, x.item()

(tensor([0.4102]), 0.4102270007133484)

In [26]:
torch.ones(5, 2).numpy()

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]], dtype=float32)

In [40]:
x = np.ones([5, 2])
y = torch.from_numpy(x).add(1)

print(x), print(y);

[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]
tensor([[2., 2.],
        [2., 2.],
        [2., 2.],
        [2., 2.],
        [2., 2.]], dtype=torch.float64)


# Autograd

In [13]:
x = torch.ones(2, 2, requires_grad=True)

x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [14]:
y = x + 2

y, y.grad_fn

(tensor([[3., 3.],
         [3., 3.]], grad_fn=<AddBackward0>), <AddBackward0 at 0x7f9a5493e1d0>)

In [15]:
z = y * y * 3
out = z.mean()

z, out, z.requires_grad

(tensor([[27., 27.],
         [27., 27.]], grad_fn=<MulBackward0>),
 tensor(27., grad_fn=<MeanBackward0>),
 True)

In [16]:
out.backward()

x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

# Architecture

In [14]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()

print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [15]:
params = list(net.parameters())

len(params), params[0].size()

(10, torch.Size([6, 1, 3, 3]))

In [16]:
input = torch.randn(1, 1, 32, 32)
out = net(input)

out

tensor([[ 0.0545, -0.1060, -0.0669, -0.1598,  0.0252,  0.0451, -0.1353, -0.0319,
          0.0355,  0.0280]], grad_fn=<AddmmBackward>)

In [17]:
target = torch.randn(10)
target = target.view(1, -1)  # make it the same shape as output

target

tensor([[ 1.5833,  0.7027,  0.2802, -0.1056,  1.4540, -1.6722,  0.4904, -1.3419,
          1.4040, -0.4102]])

In [18]:
criterion = nn.MSELoss()

loss = criterion(out, target)

loss, loss.grad_fn

(tensor(1.2278, grad_fn=<MseLossBackward>),
 <MseLossBackward at 0x7f305745e3c8>)

In [19]:
net.zero_grad()

before = net.conv1.bias.grad
loss.backward()
after = net.conv1.bias.grad

before, after

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([-0.0139, -0.0140,  0.0193, -0.0020, -0.0040, -0.0094])


In [26]:
optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer.zero_grad()

out = net(input)

loss = criterion(out, target)
loss.backward()

optimizer.step()

loss

tensor(1.1329, grad_fn=<MseLossBackward>)