In [9]:
import torch
from torch import nn
import torchvision as tv
import time

## DataSet

In [10]:
BATCH_SIZE = 256
train_dataset = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [11]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = torch.Tensor([0]), 0
    net.eval()
    for X, y in data_iter:
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [12]:
def train(net, train_iter, test_iter, trainer, num_epochs):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

## LeNet

In [13]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.Sigmoid(),
    nn.AvgPool2d(2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.Sigmoid(),
    nn.AvgPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(400, 120),
    nn.Sigmoid(),
    nn.Linear(120, 84),
    nn.Sigmoid(),
    nn.Linear(84, 10)
)

In [None]:
lr, num_epochs = 0.9, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

## AlexNet

In [14]:
BATCH_SIZE=32
transoforms = tv.transforms.Compose([
    tv.transforms.Resize((224,224)),
    tv.transforms.ToTensor()
])
train_dataset = tv.datasets.MNIST('.', train=True, transform=transoforms, download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=transoforms, download=True)

train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [15]:
net = nn.Sequential(
    nn.Conv2d(1, 96, kernel_size=11, stride=4),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Flatten(),
    nn.Linear(6400, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 10)
)

In [16]:
lr, num_epochs  = 0.01, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

Step. time since epoch: 1.129. Train acc: 0.000. Train Loss: 73.699
Step. time since epoch: 1.750. Train acc: 0.062. Train Loss: 73.771
Step. time since epoch: 2.375. Train acc: 0.156. Train Loss: 73.507
Step. time since epoch: 2.988. Train acc: 0.156. Train Loss: 73.394
Step. time since epoch: 3.601. Train acc: 0.125. Train Loss: 73.793
Step. time since epoch: 4.198. Train acc: 0.094. Train Loss: 74.203
Step. time since epoch: 4.825. Train acc: 0.156. Train Loss: 73.325
Step. time since epoch: 5.474. Train acc: 0.125. Train Loss: 73.438
Step. time since epoch: 6.083. Train acc: 0.094. Train Loss: 74.052
Step. time since epoch: 6.688. Train acc: 0.125. Train Loss: 73.665
Step. time since epoch: 7.307. Train acc: 0.062. Train Loss: 73.750
Step. time since epoch: 7.910. Train acc: 0.156. Train Loss: 73.224
Step. time since epoch: 8.523. Train acc: 0.156. Train Loss: 73.590
Step. time since epoch: 9.134. Train acc: 0.094. Train Loss: 73.764
Step. time since epoch: 9.775. Train acc: 0.188.

KeyboardInterrupt: 

## VGG

In [24]:
def vgg_block(num_convs, input_channels, num_channels):

    block = nn.Sequential(
        nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1),
        nn.ReLU()
    )

    for i in range(num_convs - 1):
        block.add_module("conv{}".format(i),
                         nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
                         )
        block.add_module("relu{}".format(i),
                         nn.ReLU()
                         )

    block.add_module("pool", nn.MaxPool2d(2, stride=2))

    return block

In [25]:
conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))

In [26]:
def vgg(conv_arch):
    net = nn.Sequential()

    for i, (num_convs, input_ch, num_channels) in enumerate(conv_arch):
        net.add_module("block{}".format(i), vgg_block(num_convs, input_ch, num_channels))

    
    classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(6272, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10))

    net.add_module('classifier', classifier)
    return net

net = vgg(conv_arch)

In [27]:
x = train_dataset[0][0].reshape(1,1,224,224)

for l in net:
    x = l(x)
    print(l, "\t\t", x.shape)

Sequential(
  (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 64, 112, 112])
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 128, 56, 56])
Sequential(
  (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (conv0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu0): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 256, 28, 28])
Sequential(
  (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (conv0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu0): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, pad

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x25088 and 6272x4096)

In [28]:
ratio = 4
small_conv_arch = [(v[0], max(v[1] // ratio, 1), v[2] // ratio) for v in conv_arch]
net = vgg(small_conv_arch)

In [29]:
small_conv_arch

[(1, 1, 16), (1, 16, 32), (2, 32, 64), (2, 64, 128), (2, 128, 128)]

In [30]:
x = train_dataset[0][0].reshape(1,1,224,224)
for l in net:
    x = l(x)
    print(l, "\t\t", x.shape)

Sequential(
  (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 16, 112, 112])
Sequential(
  (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 32, 56, 56])
Sequential(
  (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (conv0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu0): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 64, 28, 28])
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (conv0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu0): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, 

In [31]:
lr, num_epochs = 0.05, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

Step. time since epoch: 1.482. Train acc: 0.125. Train Loss: 73.779
Step. time since epoch: 2.429. Train acc: 0.062. Train Loss: 74.387
Step. time since epoch: 3.302. Train acc: 0.062. Train Loss: 73.436
Step. time since epoch: 4.193. Train acc: 0.156. Train Loss: 72.676
Step. time since epoch: 5.073. Train acc: 0.094. Train Loss: 75.069
Step. time since epoch: 5.961. Train acc: 0.094. Train Loss: 75.430
Step. time since epoch: 6.867. Train acc: 0.156. Train Loss: 73.494
Step. time since epoch: 7.780. Train acc: 0.125. Train Loss: 73.690
Step. time since epoch: 8.707. Train acc: 0.125. Train Loss: 75.226
Step. time since epoch: 9.614. Train acc: 0.125. Train Loss: 74.022


KeyboardInterrupt: 

## NiN

In [32]:
def nin_block(input_channels, num_channels, kernel_size, strides, padding):
    blk = nn.Sequential(
            nn.Conv2d(input_channels, num_channels, kernel_size, strides, padding),
            nn.ReLU(),
            nn.Conv2d(num_channels, num_channels, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(num_channels, num_channels, kernel_size=1),
            nn.ReLU()
    )
    return blk

In [33]:
net = nn.Sequential(nin_block(1, 96, kernel_size=11, strides=4, padding=0),
        nn.MaxPool2d(3, stride=2),
        nin_block(96, 256, kernel_size=5, strides=1, padding=2),
        nn.MaxPool2d(3, stride=2),
        nin_block(256, 384, kernel_size=3, strides=1, padding=1),
        nn.MaxPool2d(3, stride=2),
        nn.Dropout(0.5),
        nin_block(384, 10, kernel_size=3, strides=1, padding=1),
        nn.AvgPool2d(5),
        nn.Flatten())

In [34]:
X = train_dataset[0][0].reshape(1, 1, 224, 224)
for l in net:
    X = l(X)
    print(l , X.shape)

Sequential(
  (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
  (1): ReLU()
  (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size([1, 96, 54, 54])
MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) torch.Size([1, 96, 26, 26])
Sequential(
  (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size([1, 256, 26, 26])
MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) torch.Size([1, 256, 12, 12])
Sequential(
  (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size(

In [35]:
lr, num_epochs = 0.05, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

Step. time since epoch: 1.204. Train acc: 0.062. Train Loss: 73.798
Step. time since epoch: 2.015. Train acc: 0.125. Train Loss: 73.577
Step. time since epoch: 3.160. Train acc: 0.094. Train Loss: 73.352
Step. time since epoch: 4.008. Train acc: 0.156. Train Loss: 73.401
Step. time since epoch: 4.819. Train acc: 0.031. Train Loss: 74.732
Step. time since epoch: 5.634. Train acc: 0.031. Train Loss: 74.059
Step. time since epoch: 6.439. Train acc: 0.094. Train Loss: 73.702
Step. time since epoch: 7.247. Train acc: 0.031. Train Loss: 73.769
Step. time since epoch: 8.065. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 8.842. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 9.620. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 10.419. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 11.246. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 12.051. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 12.856. Train acc: 0.

KeyboardInterrupt: 

In [37]:
# Install torchinfo if it's not available, import it if it is
try: 
    import torchinfo
except:
    !pip install torchinfo
    import torchinfo

Collecting torchinfo
  Obtaining dependency information for torchinfo from https://files.pythonhosted.org/packages/72/25/973bd6128381951b23cdcd8a9870c6dcfc5606cb864df8eabd82e529f9c1/torchinfo-1.8.0-py3-none-any.whl.metadata
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [39]:
from torchinfo import summary
summary(net, input_size=[1, 1, 224, 224]) # 

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [1, 10]                   --
├─Sequential: 1-1                        [1, 96, 54, 54]           --
│    └─Conv2d: 2-1                       [1, 96, 54, 54]           11,712
│    └─ReLU: 2-2                         [1, 96, 54, 54]           --
│    └─Conv2d: 2-3                       [1, 96, 54, 54]           9,312
│    └─ReLU: 2-4                         [1, 96, 54, 54]           --
│    └─Conv2d: 2-5                       [1, 96, 54, 54]           9,312
│    └─ReLU: 2-6                         [1, 96, 54, 54]           --
├─MaxPool2d: 1-2                         [1, 96, 26, 26]           --
├─Sequential: 1-3                        [1, 256, 26, 26]          --
│    └─Conv2d: 2-7                       [1, 256, 26, 26]          614,656
│    └─ReLU: 2-8                         [1, 256, 26, 26]          --
│    └─Conv2d: 2-9                       [1, 256, 26, 26]          65,

## GoogleLeNet

In [249]:
nn.Module

torch.nn.modules.module.Module

In [110]:
class Inception(nn.Module):
    def __init__(self, ic, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        self.p1_1 = nn.Sequential(nn.Conv2d(ic, c1, kernel_size=1), nn.ReLU())
        self.p2_1 = nn.Sequential(nn.Conv2d(ic, c2[0], kernel_size=1), nn.ReLU())
        self.p2_2 = nn.Sequential(nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1), nn.ReLU())
        self.p3_1 = nn.Sequential(nn.Conv2d(ic, c3[0], kernel_size=1), nn.ReLU())
        self.p3_2 = nn.Sequential(nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2), nn.ReLU())
        self.p4_1 = nn.Sequential(nn.MaxPool2d(3, stride=1, padding=1))
        self.p4_2 = nn.Sequential(nn.Conv2d(ic, c4, kernel_size=1), nn.ReLU())

    def forward(self, x):
        p1 = self.p1_1(x)
        p2 = self.p2_2(self.p2_1(x))
        p3 = self.p3_2(self.p3_1(x))
        p4 = self.p4_2(self.p4_1(x))
        # Concatenate the outputs on the channel dimension.
        return torch.cat((p1, p2, p3, p4), dim=1)

In [111]:
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.ReLU(),
       nn.MaxPool2d(3, stride=2, padding=1))

In [112]:
b2 = nn.Sequential(
       nn.Conv2d(64, 64, kernel_size=1),
       nn.Conv2d(64, 192, kernel_size=3, padding=1),
       nn.MaxPool2d(3, stride=2, padding=1))

In [118]:
b3 = nn.Sequential(
       Inception(192, 64, (96, 128), (16, 32), 32),
       Inception(256, 128, (128, 192), (32, 96), 64),
       nn.MaxPool2d(3, stride=2, padding=1))

In [163]:
b4 = nn.Sequential(
       Inception(480, 192, (96, 208), (16, 48), 64),
       Inception(512, 160, (112, 224), (24, 64), 64),
       Inception(512, 128, (128, 256), (24, 64), 64),
       Inception(512, 112, (144, 288), (32, 64), 64),
       Inception(528, 256, (160, 320), (32, 128), 128),
       nn.MaxPool2d(3, stride=2, padding=1))

In [170]:
b5 = nn.Sequential(
       Inception(832, 256, (160, 320), (32, 128), 128),
       Inception(832, 384, (192, 384), (48, 128), 128),
       nn.AvgPool2d(7))

In [176]:
net = nn.Sequential(b1, b2, b3, b4, b5, nn.Flatten(), nn.Linear(1024, 10))

In [None]:
X = train_dataset[0][0].reshape(1, 1, 224, 224)
for l in net:
    X = l(X)
    print(l , X.shape)

In [None]:
lr, num_epochs = 0.05, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

## FineTuning

In [40]:
transoforms = tv.transforms.Compose([
    tv.transforms.Grayscale(3),
    tv.transforms.Resize((224,224)),
    tv.transforms.ToTensor()
])

train_dataset = tv.datasets.MNIST('.', train=True, transform=transoforms, download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=transoforms, download=True)

train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [41]:
model = tv.models.resnet18(pretrained=True)



In [42]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [43]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [44]:
model.fc = nn.Linear(in_features=512, out_features=10)

In [45]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [261]:
trainer = torch.optim.SGD(params_to_update, lr=0.001, momentum=0.9)

In [46]:
train(model, train_iter, test_iter, trainer, 5)

Step. time since epoch: 1.039. Train acc: 0.062. Train Loss: 76.296
Step. time since epoch: 1.774. Train acc: 0.062. Train Loss: 79.770
Step. time since epoch: 2.455. Train acc: 0.062. Train Loss: 77.880
Step. time since epoch: 3.146. Train acc: 0.156. Train Loss: 73.103
Step. time since epoch: 3.836. Train acc: 0.062. Train Loss: 81.671
Step. time since epoch: 4.526. Train acc: 0.031. Train Loss: 80.685
Step. time since epoch: 5.218. Train acc: 0.250. Train Loss: 72.240
Step. time since epoch: 5.905. Train acc: 0.250. Train Loss: 75.459
Step. time since epoch: 6.594. Train acc: 0.062. Train Loss: 83.678
Step. time since epoch: 7.289. Train acc: 0.125. Train Loss: 78.632
Step. time since epoch: 8.010. Train acc: 0.000. Train Loss: 78.080
Step. time since epoch: 8.691. Train acc: 0.062. Train Loss: 78.287
Step. time since epoch: 9.371. Train acc: 0.062. Train Loss: 82.328
Step. time since epoch: 10.036. Train acc: 0.156. Train Loss: 77.613
Step. time since epoch: 10.702. Train acc: 0.12

KeyboardInterrupt: 