In [199]:
import torch
from torch import nn
import torchvision as tv
import time

## DataSet

In [202]:
BATCH_SIZE = 256
train_dataset = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [203]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = torch.Tensor([0]), 0
    net.eval()
    for X, y in data_iter:
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [204]:
def train(net, train_iter, test_iter, trainer, num_epochs):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

## LeNet

In [205]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.Sigmoid(),
    nn.AvgPool2d(2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5),
    nn.Sigmoid(),
    nn.AvgPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(400, 120),
    nn.Sigmoid(),
    nn.Linear(120, 84),
    nn.Sigmoid(),
    nn.Linear(84, 10)
)

In [206]:
lr, num_epochs = 0.9, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

Step. time since epoch: 0.113. Train acc: 0.117. Train Loss: 589.539
Step. time since epoch: 0.187. Train acc: 0.133. Train Loss: 43471.098
Step. time since epoch: 0.297. Train acc: 0.094. Train Loss: 93349.023
Step. time since epoch: 0.452. Train acc: 0.156. Train Loss: 48749.699
Step. time since epoch: 0.621. Train acc: 0.133. Train Loss: 41218.789
Step. time since epoch: 0.797. Train acc: 0.078. Train Loss: 47300.453
Step. time since epoch: 0.967. Train acc: 0.062. Train Loss: 62516.086
Step. time since epoch: 1.152. Train acc: 0.117. Train Loss: 34516.039
Step. time since epoch: 1.317. Train acc: 0.094. Train Loss: 36430.434
Step. time since epoch: 1.501. Train acc: 0.082. Train Loss: 41685.742
Step. time since epoch: 1.726. Train acc: 0.113. Train Loss: 32121.875
Step. time since epoch: 1.903. Train acc: 0.105. Train Loss: 18555.381
Step. time since epoch: 2.086. Train acc: 0.086. Train Loss: 13596.615
Step. time since epoch: 2.261. Train acc: 0.094. Train Loss: 21109.389
Step. ti

KeyboardInterrupt: 

## AlexNet

In [211]:
BATCH_SIZE=32
transoforms = tv.transforms.Compose([
    tv.transforms.Resize((224,224)),
    tv.transforms.ToTensor()
])
train_dataset = tv.datasets.MNIST('.', train=True, transform=transoforms, download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=transoforms, download=True)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [212]:
net = nn.Sequential(
    nn.Conv2d(1, 96, kernel_size=11, stride=4),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=2),
    nn.Flatten(),
    nn.Linear(6400, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 4096),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(4096, 10)
)

In [213]:
lr, num_epochs  = 0.01, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

Step. time since epoch: 1.274. Train acc: 0.062. Train Loss: 73.765
Step. time since epoch: 2.632. Train acc: 0.094. Train Loss: 73.716
Step. time since epoch: 4.009. Train acc: 0.094. Train Loss: 73.571
Step. time since epoch: 5.340. Train acc: 0.156. Train Loss: 73.444
Step. time since epoch: 6.676. Train acc: 0.125. Train Loss: 73.762
Step. time since epoch: 8.093. Train acc: 0.094. Train Loss: 74.187
Step. time since epoch: 9.931. Train acc: 0.156. Train Loss: 73.346
Step. time since epoch: 11.348. Train acc: 0.125. Train Loss: 73.533
Step. time since epoch: 12.698. Train acc: 0.094. Train Loss: 74.027
Step. time since epoch: 14.050. Train acc: 0.125. Train Loss: 73.601
Step. time since epoch: 15.410. Train acc: 0.062. Train Loss: 73.721
Step. time since epoch: 16.954. Train acc: 0.156. Train Loss: 73.202
Step. time since epoch: 18.434. Train acc: 0.156. Train Loss: 73.674
Step. time since epoch: 19.783. Train acc: 0.094. Train Loss: 73.774
Step. time since epoch: 21.135. Train acc

KeyboardInterrupt: 

## VGG

In [228]:
def vgg_block(num_convs, input_channels, num_channels):
    blk = nn.Sequential(nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1), nn.ReLU())
    for i in range(num_convs - 1):
        blk.add_module("conv{}".format(i), nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1))
        blk.add_module("relu{}".format(i), nn.ReLU())
    blk.add_module("pool", nn.MaxPool2d(2, stride=2))
    return blk

In [229]:
conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))

In [238]:
def vgg(conv_arch):
    net = nn.Sequential()

    for i, (num_convs, input_ch, num_channels) in enumerate(conv_arch):
        net.add_module("block{}".format(i), vgg_block(num_convs, input_ch, num_channels))

    
    classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(6272, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10))

    net.add_module('classifier', classifier)
    return net

net = vgg(conv_arch)

In [236]:
x = train_dataset[0][0].reshape(1,1,224,224)
for l in net:
    x = l(x)
    print(l, "\t\t", x.shape)

Sequential(
  (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 64, 112, 112])
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 128, 56, 56])
Sequential(
  (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (conv0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu0): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 256, 28, 28])
Sequential(
  (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (conv0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu0): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, pad

In [239]:
ratio = 4
small_conv_arch = [(v[0], max(v[1] // ratio, 1), v[2] // ratio) for v in conv_arch]
net = vgg(small_conv_arch)

In [241]:
small_conv_arch

[(1, 1, 16), (1, 16, 32), (2, 32, 64), (2, 64, 128), (2, 128, 128)]

In [240]:
x = train_dataset[0][0].reshape(1,1,224,224)
for l in net:
    x = l(x)
    print(l, "\t\t", x.shape)

Sequential(
  (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 16, 112, 112])
Sequential(
  (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 32, 56, 56])
Sequential(
  (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (conv0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu0): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) 		 torch.Size([1, 64, 28, 28])
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (conv0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu0): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, 

In [242]:
lr, num_epochs = 0.05, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

Step. time since epoch: 1.560. Train acc: 0.125. Train Loss: 73.637
Step. time since epoch: 3.241. Train acc: 0.062. Train Loss: 74.438
Step. time since epoch: 4.870. Train acc: 0.062. Train Loss: 73.409
Step. time since epoch: 6.491. Train acc: 0.156. Train Loss: 72.695
Step. time since epoch: 8.070. Train acc: 0.094. Train Loss: 75.087
Step. time since epoch: 9.722. Train acc: 0.094. Train Loss: 75.459
Step. time since epoch: 11.380. Train acc: 0.156. Train Loss: 73.588
Step. time since epoch: 13.005. Train acc: 0.125. Train Loss: 73.655
Step. time since epoch: 14.631. Train acc: 0.031. Train Loss: 75.266
Step. time since epoch: 16.259. Train acc: 0.125. Train Loss: 74.057
Step. time since epoch: 17.910. Train acc: 0.062. Train Loss: 73.834
Step. time since epoch: 19.556. Train acc: 0.125. Train Loss: 72.412
Step. time since epoch: 21.268. Train acc: 0.125. Train Loss: 73.535
Step. time since epoch: 22.961. Train acc: 0.062. Train Loss: 74.912
Step. time since epoch: 24.577. Train ac

KeyboardInterrupt: 

## NiN

In [243]:
def nin_block(input_channels, num_channels, kernel_size, strides, padding):
    blk = nn.Sequential(
            nn.Conv2d(input_channels, num_channels, kernel_size, strides, padding),
            nn.ReLU(),
            nn.Conv2d(num_channels, num_channels, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(num_channels, num_channels, kernel_size=1),
            nn.ReLU()
    )
    return blk

In [246]:
net = nn.Sequential(nin_block(1, 96, kernel_size=11, strides=4, padding=0),
        nn.MaxPool2d(3, stride=2),
        nin_block(96, 256, kernel_size=5, strides=1, padding=2),
        nn.MaxPool2d(3, stride=2),
        nin_block(256, 384, kernel_size=3, strides=1, padding=1),
        nn.MaxPool2d(3, stride=2),
        nn.Dropout(0.5),
        nin_block(384, 10, kernel_size=3, strides=1, padding=1),
        nn.AvgPool2d(5),
        nn.Flatten())

In [247]:
X = train_dataset[0][0].reshape(1, 1, 224, 224)
for l in net:
    X = l(X)
    print(l , X.shape)

Sequential(
  (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
  (1): ReLU()
  (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size([1, 96, 54, 54])
MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) torch.Size([1, 96, 26, 26])
Sequential(
  (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size([1, 256, 26, 26])
MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) torch.Size([1, 256, 12, 12])
Sequential(
  (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size(

In [248]:
lr, num_epochs = 0.05, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

Step. time since epoch: 1.431. Train acc: 0.125. Train Loss: 73.553
Step. time since epoch: 2.855. Train acc: 0.000. Train Loss: 74.795
Step. time since epoch: 4.325. Train acc: 0.094. Train Loss: 73.754
Step. time since epoch: 5.811. Train acc: 0.062. Train Loss: 73.803
Step. time since epoch: 7.281. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 8.734. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 10.173. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 11.608. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 13.079. Train acc: 0.156. Train Loss: 73.683
Step. time since epoch: 14.502. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 15.989. Train acc: 0.156. Train Loss: 73.683
Step. time since epoch: 17.537. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 18.994. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 20.437. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 21.849. Train ac

KeyboardInterrupt: 

## GoogleLeNet

In [249]:
nn.Module

torch.nn.modules.module.Module

In [110]:
class Inception(nn.Module):
    def __init__(self, ic, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        self.p1_1 = nn.Sequential(nn.Conv2d(ic, c1, kernel_size=1), nn.ReLU())
        self.p2_1 = nn.Sequential(nn.Conv2d(ic, c2[0], kernel_size=1), nn.ReLU())
        self.p2_2 = nn.Sequential(nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1), nn.ReLU())
        self.p3_1 = nn.Sequential(nn.Conv2d(ic, c3[0], kernel_size=1), nn.ReLU())
        self.p3_2 = nn.Sequential(nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2), nn.ReLU())
        self.p4_1 = nn.Sequential(nn.MaxPool2d(3, stride=1, padding=1))
        self.p4_2 = nn.Sequential(nn.Conv2d(ic, c4, kernel_size=1), nn.ReLU())

    def forward(self, x):
        p1 = self.p1_1(x)
        p2 = self.p2_2(self.p2_1(x))
        p3 = self.p3_2(self.p3_1(x))
        p4 = self.p4_2(self.p4_1(x))
        # Concatenate the outputs on the channel dimension.
        return torch.cat((p1, p2, p3, p4), dim=1)

In [111]:
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.ReLU(),
       nn.MaxPool2d(3, stride=2, padding=1))

In [112]:
b2 = nn.Sequential(
       nn.Conv2d(64, 64, kernel_size=1),
       nn.Conv2d(64, 192, kernel_size=3, padding=1),
       nn.MaxPool2d(3, stride=2, padding=1))

In [118]:
b3 = nn.Sequential(
       Inception(192, 64, (96, 128), (16, 32), 32),
       Inception(256, 128, (128, 192), (32, 96), 64),
       nn.MaxPool2d(3, stride=2, padding=1))

In [163]:
b4 = nn.Sequential(
       Inception(480, 192, (96, 208), (16, 48), 64),
       Inception(512, 160, (112, 224), (24, 64), 64),
       Inception(512, 128, (128, 256), (24, 64), 64),
       Inception(512, 112, (144, 288), (32, 64), 64),
       Inception(528, 256, (160, 320), (32, 128), 128),
       nn.MaxPool2d(3, stride=2, padding=1))

In [170]:
b5 = nn.Sequential(
       Inception(832, 256, (160, 320), (32, 128), 128),
       Inception(832, 384, (192, 384), (48, 128), 128),
       nn.AvgPool2d(7))

In [176]:
net = nn.Sequential(b1, b2, b3, b4, b5, nn.Flatten(), nn.Linear(1024, 10))

In [250]:
X = train_dataset[0][0].reshape(1, 1, 224, 224)
for l in net:
    X = l(X)
    print(l , X.shape)

Sequential(
  (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
  (1): ReLU()
  (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size([1, 96, 54, 54])
MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) torch.Size([1, 96, 26, 26])
Sequential(
  (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size([1, 256, 26, 26])
MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) torch.Size([1, 256, 12, 12])
Sequential(
  (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
) torch.Size(

In [251]:
lr, num_epochs = 0.05, 5
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train(net, train_iter, test_iter, trainer, num_epochs)

Step. time since epoch: 1.408. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 2.805. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 4.202. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 5.615. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 7.018. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 8.427. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 9.838. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 11.256. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 12.660. Train acc: 0.156. Train Loss: 73.683
Step. time since epoch: 14.102. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 15.511. Train acc: 0.156. Train Loss: 73.683
Step. time since epoch: 16.935. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 18.384. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 20.253. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 21.717. Train acc

Step. time since epoch: 174.299. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 175.684. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 177.241. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 178.764. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 180.271. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 181.808. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 183.362. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 184.885. Train acc: 0.156. Train Loss: 73.683
Step. time since epoch: 186.377. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 187.818. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 189.295. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 190.850. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 192.392. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 193.795. Train acc: 0.062. Train Loss: 73.683
Step. time since epo

Step. time since epoch: 346.417. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 347.953. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 349.409. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 350.878. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 352.335. Train acc: 0.156. Train Loss: 73.683
Step. time since epoch: 353.776. Train acc: 0.156. Train Loss: 73.683
Step. time since epoch: 355.199. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 356.604. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 357.983. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 359.372. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 360.766. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 362.135. Train acc: 0.188. Train Loss: 73.683
Step. time since epoch: 363.578. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 365.045. Train acc: 0.094. Train Loss: 73.683
Step. time since epo

Step. time since epoch: 517.000. Train acc: 0.219. Train Loss: 73.683
Step. time since epoch: 518.474. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 519.945. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 521.345. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 522.804. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 524.242. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 525.701. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 527.078. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 528.537. Train acc: 0.156. Train Loss: 73.683
Step. time since epoch: 529.948. Train acc: 0.062. Train Loss: 73.683
Step. time since epoch: 531.349. Train acc: 0.094. Train Loss: 73.683
Step. time since epoch: 532.787. Train acc: 0.031. Train Loss: 73.683
Step. time since epoch: 534.271. Train acc: 0.125. Train Loss: 73.683
Step. time since epoch: 535.706. Train acc: 0.000. Train Loss: 73.683
Step. time since epo

KeyboardInterrupt: 

## FineTuning

In [253]:
transoforms = tv.transforms.Compose([
    tv.transforms.Grayscale(3),
    tv.transforms.Resize((224,224)),
    tv.transforms.ToTensor()
])
train_dataset = tv.datasets.MNIST('.', train=True, transform=transoforms, download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=transoforms, download=True)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [254]:
model = tv.models.resnet18(pretrained=True)

In [257]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [258]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [259]:
model.fc = nn.Linear(in_features=512, out_features=10)

In [260]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [261]:
trainer = torch.optim.SGD(params_to_update, lr=0.001, momentum=0.9)

In [262]:
train(model, train_iter, test_iter, trainer, 5)

Step. time since epoch: 1.118. Train acc: 0.031. Train Loss: 77.998
Step. time since epoch: 2.217. Train acc: 0.031. Train Loss: 81.995
Step. time since epoch: 3.312. Train acc: 0.156. Train Loss: 72.565
Step. time since epoch: 4.417. Train acc: 0.219. Train Loss: 69.465
Step. time since epoch: 5.597. Train acc: 0.156. Train Loss: 73.562
Step. time since epoch: 6.811. Train acc: 0.500. Train Loss: 55.271
Step. time since epoch: 8.055. Train acc: 0.250. Train Loss: 69.794
Step. time since epoch: 9.237. Train acc: 0.375. Train Loss: 53.511
Step. time since epoch: 10.383. Train acc: 0.562. Train Loss: 35.115
Step. time since epoch: 11.522. Train acc: 0.812. Train Loss: 19.998
Step. time since epoch: 12.821. Train acc: 0.531. Train Loss: 41.876
Step. time since epoch: 14.044. Train acc: 0.750. Train Loss: 23.873
Step. time since epoch: 15.412. Train acc: 0.688. Train Loss: 30.894
Step. time since epoch: 16.602. Train acc: 0.688. Train Loss: 30.866
Step. time since epoch: 17.797. Train acc:

KeyboardInterrupt: 