In [1]:
import torch
from torch import nn
import torchvision.models as models

import torchvision as tv
import time

import os

In [4]:
dev = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
dev

device(type='cpu')

In [11]:
data_dir = '/Users/wizard/Yandex.Disk.localized/data s/HW/DLL/hymenoptera_data'


In [12]:
BATCH_SIZE = 32



In [13]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': tv.transforms.Compose([
#         tv.transforms.RandomResizedCrop(224),
#         tv.transforms.RandomHorizontalFlip(),
#         tv.transforms.RandomVerticalFlip(),
        tv.transforms.CenterCrop(224),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': tv.transforms.Compose([
        tv.transforms.Resize(256),
        tv.transforms.CenterCrop(224),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: tv.datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Initializing Datasets and Dataloaders...


In [14]:
import torchvision.models as models
resnet18 = models.resnet18()
vgg16 = models.vgg16()

In [15]:
resnet18 = models.resnet18(pretrained=True)
vgg16 = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/wizard/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100.0%
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /Users/wizard/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100.0%


#### 1.Обучите на нем модели ResNet 18 и VGG 16 с нуля (5-10 эпох)

## ResNet 18

In [16]:
model = models.resnet18()

In [17]:
model = model.to(dev)

In [18]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [19]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [20]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 5.958. Train acc: 0.000. Train Loss: 215.373
Step. time since epoch: 8.746. Train acc: 0.500. Train Loss: 182.898
Step. time since epoch: 11.442. Train acc: 0.469. Train Loss: 145.546
Step. time since epoch: 14.090. Train acc: 0.500. Train Loss: 107.812
Step. time since epoch: 16.756. Train acc: 0.750. Train Loss: 58.738
Step. time since epoch: 19.562. Train acc: 0.594. Train Loss: 68.560
Step. time since epoch: 22.215. Train acc: 0.562. Train Loss: 81.258
Step. time since epoch: 23.931. Train acc: 0.750. Train Loss: 20.331
epoch 1, loss 3.6087, train acc 0.504, test acc 0.542, time 72.4 sec
Step. time since epoch: 5.751. Train acc: 0.594. Train Loss: 426.400
Step. time since epoch: 8.444. Train acc: 0.469. Train Loss: 551.599
Step. time since epoch: 11.099. Train acc: 0.469. Train Loss: 80.927
Step. time since epoch: 13.774. Train acc: 0.438. Train Loss: 141.159
Step. time since epoch: 16.421. Train acc: 0.562. Train Loss: 24.820
Step. time since epoch: 19.033.

In [21]:
# release CUDA
model.eval()
torch.cuda.empty_cache()

## VGG 16

In [22]:
model = models.vgg16()

In [23]:
model = model.to(dev)

In [24]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [25]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [26]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 19.436. Train acc: 0.000. Train Loss: 223.094
Step. time since epoch: 35.856. Train acc: 0.469. Train Loss: 4583.928
Step. time since epoch: 51.988. Train acc: 0.500. Train Loss: 184.526
Step. time since epoch: 68.032. Train acc: 0.469. Train Loss: 620.793
Step. time since epoch: 84.269. Train acc: 0.375. Train Loss: 208.773
Step. time since epoch: 100.610. Train acc: 0.719. Train Loss: 211.643
Step. time since epoch: 116.723. Train acc: 0.531. Train Loss: 209.501
Step. time since epoch: 127.642. Train acc: 0.550. Train Loss: 122.799
epoch 1, loss 26.0863, train acc 0.447, test acc 0.458, time 191.6 sec
Step. time since epoch: 18.840. Train acc: 0.594. Train Loss: 148.030
Step. time since epoch: 35.203. Train acc: 0.469. Train Loss: 206.316
Step. time since epoch: 51.586. Train acc: 0.531. Train Loss: 102.189
Step. time since epoch: 68.041. Train acc: 0.469. Train Loss: 122.686
Step. time since epoch: 84.223. Train acc: 0.500. Train Loss: 95.468
Step. time since

## 2.Обучите на нем модели ResNet 18 и VGG 16 с использованием FineTuning (5-10 эпох)

### ResNet 18 pretrained

In [28]:
model = models.resnet18(pretrained=True)


In [29]:
model = model.to(dev)


In [30]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [31]:
model.fc


Linear(in_features=512, out_features=1000, bias=True)

In [32]:
model.fc = nn.Linear(in_features=512, out_features=2).to(dev)


In [33]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [34]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [35]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [37]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 3.728. Train acc: 0.406. Train Loss: 22.600
Step. time since epoch: 4.937. Train acc: 0.531. Train Loss: 21.821
Step. time since epoch: 6.124. Train acc: 0.625. Train Loss: 20.443
Step. time since epoch: 7.301. Train acc: 0.594. Train Loss: 23.329
Step. time since epoch: 8.480. Train acc: 0.656. Train Loss: 19.574
Step. time since epoch: 9.651. Train acc: 0.781. Train Loss: 17.721
Step. time since epoch: 10.837. Train acc: 0.562. Train Loss: 19.666
Step. time since epoch: 11.622. Train acc: 0.650. Train Loss: 12.380
epoch 1, loss 0.6456, train acc 0.598, test acc 0.745, time 59.6 sec
Step. time since epoch: 3.776. Train acc: 0.688. Train Loss: 16.058
Step. time since epoch: 5.028. Train acc: 0.812. Train Loss: 14.684
Step. time since epoch: 6.259. Train acc: 0.812. Train Loss: 13.618
Step. time since epoch: 7.408. Train acc: 0.844. Train Loss: 14.268
Step. time since epoch: 8.591. Train acc: 0.938. Train Loss: 11.488
Step. time since epoch: 9.775. Train acc: 0.8

### VGG 16 pretrained


In [38]:
model = models.vgg16(pretrained=True)


In [39]:
model = model.to(dev)


In [40]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [41]:
model.classifier


Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [43]:
model.classifier[6] = nn.Linear(in_features=4096, out_features=2).to(dev)

In [44]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 classifier.6.weight
	 classifier.6.bias


In [45]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [46]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [47]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 7.073. Train acc: 0.469. Train Loss: 27.675
Step. time since epoch: 11.456. Train acc: 0.656. Train Loss: 18.629
Step. time since epoch: 15.782. Train acc: 0.750. Train Loss: 14.478
Step. time since epoch: 19.870. Train acc: 0.812. Train Loss: 12.869
Step. time since epoch: 24.008. Train acc: 0.969. Train Loss: 8.696
Step. time since epoch: 28.344. Train acc: 0.969. Train Loss: 6.461
Step. time since epoch: 32.680. Train acc: 0.969. Train Loss: 5.082
Step. time since epoch: 35.859. Train acc: 0.900. Train Loss: 4.712
epoch 1, loss 0.4041, train acc 0.807, test acc 0.948, time 99.5 sec
Step. time since epoch: 6.716. Train acc: 0.938. Train Loss: 5.628
Step. time since epoch: 10.851. Train acc: 1.000. Train Loss: 2.393
Step. time since epoch: 14.945. Train acc: 0.938. Train Loss: 4.882
Step. time since epoch: 19.127. Train acc: 0.969. Train Loss: 3.151
Step. time since epoch: 23.400. Train acc: 0.938. Train Loss: 3.356
Step. time since epoch: 27.823. Train acc: 1.

### 3.Добавьте аугментацию данных к пункту 2

In [48]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': tv.transforms.Compose([
        tv.transforms.RandomResizedCrop(224),
        tv.transforms.RandomHorizontalFlip(),
        tv.transforms.RandomVerticalFlip(),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': tv.transforms.Compose([
        tv.transforms.Resize(256),
        tv.transforms.CenterCrop(224),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: tv.datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Initializing Datasets and Dataloaders...


### ResNet 18 pretrained + aug


In [49]:
model = models.resnet18(pretrained=True)


In [50]:
model = model.to(dev)


In [51]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [52]:
model.fc


Linear(in_features=512, out_features=1000, bias=True)

In [53]:
model.fc = nn.Linear(in_features=512, out_features=2).to(dev)


In [54]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [55]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [56]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [57]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 4.014. Train acc: 0.531. Train Loss: 32.026
Step. time since epoch: 5.252. Train acc: 0.531. Train Loss: 24.717
Step. time since epoch: 6.471. Train acc: 0.344. Train Loss: 24.991
Step. time since epoch: 7.684. Train acc: 0.438. Train Loss: 23.758
Step. time since epoch: 8.883. Train acc: 0.469. Train Loss: 25.052
Step. time since epoch: 10.069. Train acc: 0.531. Train Loss: 25.121
Step. time since epoch: 11.257. Train acc: 0.500. Train Loss: 24.369
Step. time since epoch: 12.046. Train acc: 0.500. Train Loss: 15.275
epoch 1, loss 0.8004, train acc 0.480, test acc 0.719, time 60.1 sec
Step. time since epoch: 3.667. Train acc: 0.531. Train Loss: 21.594
Step. time since epoch: 4.844. Train acc: 0.656. Train Loss: 20.022
Step. time since epoch: 5.994. Train acc: 0.594. Train Loss: 19.602
Step. time since epoch: 7.144. Train acc: 0.625. Train Loss: 19.760
Step. time since epoch: 8.290. Train acc: 0.531. Train Loss: 20.272
Step. time since epoch: 9.441. Train acc: 0.

### VGG 16 pretrained + aug


In [58]:
model = models.vgg16(pretrained=True)


In [59]:
model = model.to(dev)


In [60]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [61]:
model.classifier


Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [62]:
model.classifier[6] = nn.Linear(in_features=4096, out_features=2).to(dev)


In [63]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 classifier.6.weight
	 classifier.6.bias


In [64]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [65]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [66]:
lr, num_epochs = 0.001, 5
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 7.728. Train acc: 0.500. Train Loss: 23.961
Step. time since epoch: 11.892. Train acc: 0.594. Train Loss: 20.867
Step. time since epoch: 16.390. Train acc: 0.688. Train Loss: 16.852
Step. time since epoch: 20.792. Train acc: 0.875. Train Loss: 10.949
Step. time since epoch: 25.003. Train acc: 0.875. Train Loss: 10.456
Step. time since epoch: 29.478. Train acc: 0.938. Train Loss: 9.403
Step. time since epoch: 33.688. Train acc: 0.906. Train Loss: 8.298
Step. time since epoch: 36.958. Train acc: 0.900. Train Loss: 5.571
epoch 1, loss 0.4359, train acc 0.779, test acc 0.954, time 102.2 sec
Step. time since epoch: 7.027. Train acc: 1.000. Train Loss: 3.968
Step. time since epoch: 11.189. Train acc: 0.969. Train Loss: 4.746
Step. time since epoch: 15.348. Train acc: 1.000. Train Loss: 4.745
Step. time since epoch: 19.456. Train acc: 0.969. Train Loss: 4.866
Step. time since epoch: 23.570. Train acc: 0.875. Train Loss: 7.522
Step. time since epoch: 27.678. Train acc: 

## Сравните качество всех 3 полученных подходов

Качество обучения на тестовой и валидационной выборках в случае finetuning оказалось наилучшим. При этом предполагаем, что с учетом аугментации данных модель становится стабильнее.

## Примените FineTuning ResNet 18 к FashionMnist. Удалось ли увидеть резкое увеличение качества?

In [68]:
BATCH_SIZE = 256

In [69]:
transoforms = tv.transforms.Compose([
    tv.transforms.Grayscale(3),
    tv.transforms.Resize((224,224)),
    tv.transforms.ToTensor()
])
train_dataset = tv.datasets.MNIST('.', train=True, transform=transoforms, download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=transoforms, download=True)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


0.3%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz


100.0%

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz



2.0%

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz


100.0%

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw






In [70]:
model = tv.models.resnet18(pretrained=True)

In [71]:
model = model.to(dev)


In [72]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [73]:
model.fc


Linear(in_features=512, out_features=1000, bias=True)

In [74]:
model.fc = nn.Linear(in_features=512, out_features=10).to(dev)


In [75]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [76]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [77]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [78]:
lr, num_epochs = 0.001, 1
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs, dev)

Step. time since epoch: 9.134. Train acc: 0.078. Train Loss: 642.061
Step. time since epoch: 18.045. Train acc: 0.102. Train Loss: 609.979
Step. time since epoch: 26.910. Train acc: 0.145. Train Loss: 593.892
Step. time since epoch: 35.787. Train acc: 0.188. Train Loss: 572.288
Step. time since epoch: 44.634. Train acc: 0.238. Train Loss: 564.916
Step. time since epoch: 53.510. Train acc: 0.309. Train Loss: 535.893
Step. time since epoch: 62.475. Train acc: 0.352. Train Loss: 512.122
Step. time since epoch: 71.335. Train acc: 0.395. Train Loss: 503.749
Step. time since epoch: 80.255. Train acc: 0.461. Train Loss: 477.669
Step. time since epoch: 89.077. Train acc: 0.492. Train Loss: 477.212
Step. time since epoch: 97.966. Train acc: 0.535. Train Loss: 461.547
Step. time since epoch: 106.865. Train acc: 0.574. Train Loss: 438.375
Step. time since epoch: 115.684. Train acc: 0.578. Train Loss: 435.309
Step. time since epoch: 124.578. Train acc: 0.578. Train Loss: 422.826
Step. time since e

KeyboardInterrupt: 

хорошо обучается и выходит за 90-92% на сравнительном быстром периоде обучения, чем обычная сетка, но в определенный момент начинает переобучаться