# **Основное задание**



Возьмите датасет https://www.kaggle.com/ajayrana/hymenoptera-data/kernels

1.Обучите на нем модели ResNet 18 и VGG 16 с нуля (5-10 эпох)  
2.Обучите на нем модели ResNet 18 и VGG 16 с использованием FineTuning (5-10 эпох)  
3.Добавьте аугментацию данных к пункту 2  

Сравните качество всех 3 полученных подходов  

Задание со звездочкой  
Примените FineTuning ResNet 18 к FashionMnist. Удалось ли увидеть резкое увеличение качества?  



# Загружаем библиотеки. Смотрим, что доступно cpu или cuda. Загружаем данные для пунктов 1. и 2. (модель обучаемая с нуля и модель предобученная (без аугментации)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch
from torch import nn
import torchvision.models as models

import torchvision as tv
import time

import os

In [3]:
dev = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
dev

device(type='cpu')

In [5]:
data_dir = '/content/drive/My Drive/Colab Notebooks/hymenoptera_data/'


In [6]:
BATCH_SIZE = 32

In [7]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': tv.transforms.Compose([
#         tv.transforms.RandomResizedCrop(224),
#         tv.transforms.RandomHorizontalFlip(),
#         tv.transforms.RandomVerticalFlip(),
        tv.transforms.CenterCrop(224),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': tv.transforms.Compose([
        tv.transforms.Resize(256),
        tv.transforms.CenterCrop(224),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: tv.datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Initializing Datasets and Dataloaders...


  cpuset_checked))


# **1. Обучите на нем модели ResNet 18 и VGG 16 с нуля (5-10 эпох)**

## **ResNet 18**

In [31]:
model = models.resnet18(pretrained=False)

In [32]:
model = model.to(dev)

In [33]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [34]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [35]:
model.fc = nn.Linear(in_features=512, out_features=2).to(dev)

In [36]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [37]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [38]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [39]:
lr, num_epochs = 0.001, 10
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

  cpuset_checked))


Step. time since epoch: 3.906. Train acc: 0.469. Train Loss: 27.953
Step. time since epoch: 5.839. Train acc: 0.594. Train Loss: 21.808
Step. time since epoch: 7.775. Train acc: 0.500. Train Loss: 22.784
Step. time since epoch: 9.712. Train acc: 0.469. Train Loss: 24.737
Step. time since epoch: 11.664. Train acc: 0.469. Train Loss: 24.108
Step. time since epoch: 13.634. Train acc: 0.438. Train Loss: 23.391
Step. time since epoch: 15.583. Train acc: 0.500. Train Loss: 22.956
Step. time since epoch: 16.838. Train acc: 0.250. Train Loss: 16.119
epoch 1, loss 0.7535, train acc 0.471, test acc 0.458, time 26.6 sec
Step. time since epoch: 3.566. Train acc: 0.688. Train Loss: 20.883
Step. time since epoch: 5.175. Train acc: 0.438. Train Loss: 23.722
Step. time since epoch: 6.792. Train acc: 0.531. Train Loss: 22.181
Step. time since epoch: 8.404. Train acc: 0.344. Train Loss: 24.504
Step. time since epoch: 10.010. Train acc: 0.469. Train Loss: 22.532
Step. time since epoch: 11.615. Train acc:

## **VGG16**

In [22]:
model = models.vgg16(pretrained=False)

In [23]:
model = model.to(dev)

In [24]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [25]:
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [26]:
model.classifier[6] = nn.Linear(in_features=4096, out_features=2).to(dev)

In [27]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 classifier.6.weight
	 classifier.6.bias


In [28]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [29]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [30]:
lr, num_epochs = 0.001, 10
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

  cpuset_checked))


Step. time since epoch: 13.711. Train acc: 0.688. Train Loss: 21.577
Step. time since epoch: 24.742. Train acc: 0.406. Train Loss: 22.594
Step. time since epoch: 35.770. Train acc: 0.500. Train Loss: 22.796
Step. time since epoch: 46.750. Train acc: 0.406. Train Loss: 22.999
Step. time since epoch: 57.726. Train acc: 0.438. Train Loss: 22.146
Step. time since epoch: 68.676. Train acc: 0.562. Train Loss: 21.963
Step. time since epoch: 79.714. Train acc: 0.500. Train Loss: 22.563
Step. time since epoch: 86.800. Train acc: 0.400. Train Loss: 14.322
epoch 1, loss 0.7007, train acc 0.492, test acc 0.542, time 142.6 sec
Step. time since epoch: 14.073. Train acc: 0.594. Train Loss: 21.855
Step. time since epoch: 25.023. Train acc: 0.438. Train Loss: 22.217
Step. time since epoch: 35.976. Train acc: 0.438. Train Loss: 22.199
Step. time since epoch: 46.929. Train acc: 0.531. Train Loss: 22.137
Step. time since epoch: 57.900. Train acc: 0.656. Train Loss: 21.680
Step. time since epoch: 68.818. T

In [None]:
# release CUDA
#model.eval()
#torch.cuda.empty_cache()

# **2. Обучите на нем модели ResNet 18 и VGG 16 с использованием FineTuning (5-10 эпох)**

## **ResNet 18 FineTuning (pretrained)** 

In [None]:
model = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




In [None]:
model = model.to(dev)

In [None]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [None]:
model.fc = nn.Linear(in_features=512, out_features=2).to(dev)

In [None]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [None]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [None]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [None]:
lr, num_epochs = 0.001, 10
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 4.939. Train acc: 0.531. Train Loss: 25.279
Step. time since epoch: 7.649. Train acc: 0.406. Train Loss: 28.519
Step. time since epoch: 10.330. Train acc: 0.469. Train Loss: 24.553
Step. time since epoch: 13.009. Train acc: 0.406. Train Loss: 28.903
Step. time since epoch: 15.731. Train acc: 0.438. Train Loss: 26.804
Step. time since epoch: 18.404. Train acc: 0.688. Train Loss: 18.240
Step. time since epoch: 21.097. Train acc: 0.625. Train Loss: 19.237
Step. time since epoch: 22.832. Train acc: 0.750. Train Loss: 11.736
epoch 1, loss 0.7511, train acc 0.529, test acc 0.673, time 36.8 sec
Step. time since epoch: 4.455. Train acc: 0.812. Train Loss: 16.578
Step. time since epoch: 6.884. Train acc: 0.688. Train Loss: 17.979
Step. time since epoch: 9.314. Train acc: 0.781. Train Loss: 15.902
Step. time since epoch: 11.719. Train acc: 0.719. Train Loss: 19.145
Step. time since epoch: 14.124. Train acc: 0.781. Train Loss: 15.939
Step. time since epoch: 16.555. Train a


 ## **VGG 16 FineTuning (pretrained)**

In [None]:
dev

device(type='cpu')

In [None]:
model = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))




In [None]:
model = model.to(dev)

In [None]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [18]:
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [None]:
model.classifier[6] = nn.Linear(in_features=4096, out_features=2).to(dev)

In [None]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 classifier.6.weight
	 classifier.6.bias


In [None]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [None]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [None]:
lr, num_epochs = 0.001, 10
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 20.663. Train acc: 0.406. Train Loss: 28.631
Step. time since epoch: 37.458. Train acc: 0.750. Train Loss: 14.702
Step. time since epoch: 54.332. Train acc: 0.812. Train Loss: 13.320
Step. time since epoch: 71.246. Train acc: 0.875. Train Loss: 9.190
Step. time since epoch: 88.099. Train acc: 0.938. Train Loss: 8.278
Step. time since epoch: 104.910. Train acc: 0.938. Train Loss: 7.254
Step. time since epoch: 121.792. Train acc: 0.906. Train Loss: 6.216
Step. time since epoch: 132.521. Train acc: 0.900. Train Loss: 3.281
epoch 1, loss 0.3724, train acc 0.811, test acc 0.961, time 216.5 sec
Step. time since epoch: 19.930. Train acc: 0.969. Train Loss: 3.729
Step. time since epoch: 36.818. Train acc: 0.938. Train Loss: 4.116
Step. time since epoch: 53.798. Train acc: 0.969. Train Loss: 2.965
Step. time since epoch: 70.633. Train acc: 0.938. Train Loss: 4.625
Step. time since epoch: 87.642. Train acc: 0.938. Train Loss: 4.408
Step. time since epoch: 104.617. Train a

# **3. Добавьте аугментацию данных к пункту 2**

Проведём аугментацию на тренировочных данных и обучим модели ResNet 18 и VGG 16

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': tv.transforms.Compose([
        tv.transforms.RandomResizedCrop(224),
        tv.transforms.RandomHorizontalFlip(),
        tv.transforms.RandomVerticalFlip(),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': tv.transforms.Compose([
        tv.transforms.Resize(256),
        tv.transforms.CenterCrop(224),
        tv.transforms.ToTensor(),
        tv.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: tv.datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4) for x in ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Initializing Datasets and Dataloaders...


## **ResNet 18 FineTuning (pretrained) + augmentation** 

In [None]:
device

device(type='cpu')

In [None]:
model = models.resnet18(pretrained=True)

In [None]:
model = model.to(dev)

In [None]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [None]:
model.fc = nn.Linear(in_features=512, out_features=2).to(dev)

In [None]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 conv1.weight
	 bn1.weight
	 bn1.bias
	 layer1.0.conv1.weight
	 layer1.0.bn1.weight
	 layer1.0.bn1.bias
	 layer1.0.conv2.weight
	 layer1.0.bn2.weight
	 layer1.0.bn2.bias
	 layer1.1.conv1.weight
	 layer1.1.bn1.weight
	 layer1.1.bn1.bias
	 layer1.1.conv2.weight
	 layer1.1.bn2.weight
	 layer1.1.bn2.bias
	 layer2.0.conv1.weight
	 layer2.0.bn1.weight
	 layer2.0.bn1.bias
	 layer2.0.conv2.weight
	 layer2.0.bn2.weight
	 layer2.0.bn2.bias
	 layer2.0.downsample.0.weight
	 layer2.0.downsample.1.weight
	 layer2.0.downsample.1.bias
	 layer2.1.conv1.weight
	 layer2.1.bn1.weight
	 layer2.1.bn1.bias
	 layer2.1.conv2.weight
	 layer2.1.bn2.weight
	 layer2.1.bn2.bias
	 layer3.0.conv1.weight
	 layer3.0.bn1.weight
	 layer3.0.bn1.bias
	 layer3.0.conv2.weight
	 layer3.0.bn2.weight
	 layer3.0.bn2.bias
	 layer3.0.downsample.0.weight
	 layer3.0.downsample.1.weight
	 layer3.0.downsample.1.bias
	 layer3.1.conv1.weight
	 layer3.1.bn1.weight
	 layer3.1.bn1.bias
	 layer3.1.conv2.weight
	 layer3.1.b

In [None]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [None]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [None]:
lr, num_epochs = 0.001, 10
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 10.941. Train acc: 0.375. Train Loss: 25.329
Step. time since epoch: 19.332. Train acc: 0.844. Train Loss: 10.661
Step. time since epoch: 27.865. Train acc: 0.906. Train Loss: 7.802
Step. time since epoch: 36.214. Train acc: 0.875. Train Loss: 20.955
Step. time since epoch: 44.578. Train acc: 0.812. Train Loss: 16.322
Step. time since epoch: 52.910. Train acc: 0.812. Train Loss: 21.766
Step. time since epoch: 61.249. Train acc: 0.781. Train Loss: 34.898
Step. time since epoch: 66.282. Train acc: 0.750. Train Loss: 23.966
epoch 1, loss 0.6627, train acc 0.770, test acc 0.719, time 81.7 sec
Step. time since epoch: 10.800. Train acc: 0.781. Train Loss: 31.196
Step. time since epoch: 18.685. Train acc: 0.469. Train Loss: 307.482
Step. time since epoch: 26.655. Train acc: 0.625. Train Loss: 22.559
Step. time since epoch: 34.601. Train acc: 0.500. Train Loss: 22.788
Step. time since epoch: 42.629. Train acc: 0.531. Train Loss: 58.193
Step. time since epoch: 50.649. Tr

In [None]:
# release CUDA
model.eval()
torch.cuda.empty_cache()

## **VGG 16 FineTuning (pretrained) + augmentation**

In [None]:
model = models.vgg16(pretrained=True)

In [None]:
model = model.to(dev)

In [None]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [None]:
model.classifier[6] = nn.Linear(in_features=4096, out_features=2).to(dev)

In [None]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 classifier.6.weight
	 classifier.6.bias


In [None]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    net.eval()
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [None]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [None]:
lr, num_epochs = 0.001, 10
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, dataloaders_dict['train'], dataloaders_dict['val'], trainer, num_epochs, dev)

Step. time since epoch: 20.598. Train acc: 0.625. Train Loss: 22.634
Step. time since epoch: 37.339. Train acc: 0.719. Train Loss: 19.483
Step. time since epoch: 54.100. Train acc: 0.844. Train Loss: 13.176
Step. time since epoch: 70.945. Train acc: 0.875. Train Loss: 10.633
Step. time since epoch: 87.685. Train acc: 0.875. Train Loss: 9.035
Step. time since epoch: 104.404. Train acc: 0.938. Train Loss: 9.971
Step. time since epoch: 121.155. Train acc: 0.844. Train Loss: 11.402
Step. time since epoch: 131.732. Train acc: 0.950. Train Loss: 3.836
epoch 1, loss 0.4105, train acc 0.828, test acc 0.961, time 215.6 sec
Step. time since epoch: 20.229. Train acc: 0.969. Train Loss: 4.856
Step. time since epoch: 36.990. Train acc: 1.000. Train Loss: 2.919
Step. time since epoch: 53.770. Train acc: 1.000. Train Loss: 2.424
Step. time since epoch: 70.555. Train acc: 0.906. Train Loss: 5.035
Step. time since epoch: 87.323. Train acc: 0.938. Train Loss: 5.409
Step. time since epoch: 104.094. Train

## **Сравните качество всех трёх подходов.**

Обучение моделей ResNet 18 и VGG 16 выдаёт совсем плохой результат. Применение FineTuning, обучение на заранее тренированной модели, даёт практически стопроцентный результат. Аугментация придаёт стабильность результатам.

# **Задание ***  
# **Примените FineTuning ResNet 18 к FashionMnist.**  
# **Удалось ли увидеть резкое увеличение качества?**  



In [None]:
BATCH_SIZE = 256

In [None]:
_transforms = tv.transforms.Compose([
    tv.transforms.Grayscale(3),
    tv.transforms.Resize((224,224)),
    tv.transforms.ToTensor()
])
train_dataset = tv.datasets.MNIST('.', train=True, transform=_transforms, download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=_transforms, download=True)
train_iter = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test_iter = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
model = tv.models.resnet18(pretrained=True)

In [None]:
model = model.to(dev)

In [None]:
## Убираем требование градиента:
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [None]:
model.fc = nn.Linear(in_features=512, out_features=10).to(dev)

In [None]:
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 fc.weight
	 fc.bias


In [None]:
def evaluate_accuracy(data_iter, net, dev):
    acc_sum, n = torch.Tensor([0]).to(dev), 0
    for X, y in data_iter:
        X, y = X.to(dev), y.to(dev)
        acc_sum += (net(X).argmax(axis=1) == y).sum()
        n += y.shape[0]
    return acc_sum.item() / n

In [None]:
def train(net, train_iter, test_iter, trainer, num_epochs, dev):
    loss = nn.CrossEntropyLoss(reduction='sum')
    net.train()
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            trainer.zero_grad()
            X, y = X.to(dev), y.to(dev)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            trainer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().item()
            n += y.shape[0]
            print("Step. time since epoch: {:.3f}. Train acc: {:.3f}. Train Loss: {:.3f}".format(time.time() -  start,
                (y_hat.argmax(axis=1) == y).sum().item() / y.shape[0], l.item()))
        test_acc = evaluate_accuracy(test_iter, net, dev)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
              'time %.1f sec'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                 time.time() - start))

In [None]:
lr, num_epochs = 0.001, 10
trainer = torch.optim.Adam(model.parameters(), lr=lr)
train(model, train_iter, test_iter, trainer, num_epochs, dev)

Step. time since epoch: 21.348. Train acc: 0.121. Train Loss: 678.064
Step. time since epoch: 42.384. Train acc: 0.117. Train Loss: 623.940
Step. time since epoch: 63.294. Train acc: 0.152. Train Loss: 573.069
Step. time since epoch: 84.168. Train acc: 0.188. Train Loss: 556.026
Step. time since epoch: 105.184. Train acc: 0.207. Train Loss: 548.091
Step. time since epoch: 126.013. Train acc: 0.211. Train Loss: 549.575
Step. time since epoch: 146.943. Train acc: 0.270. Train Loss: 519.869
Step. time since epoch: 168.033. Train acc: 0.312. Train Loss: 511.276
Step. time since epoch: 189.009. Train acc: 0.359. Train Loss: 494.909
Step. time since epoch: 210.151. Train acc: 0.410. Train Loss: 494.599
Step. time since epoch: 231.226. Train acc: 0.469. Train Loss: 462.397
Step. time since epoch: 252.115. Train acc: 0.527. Train Loss: 454.913
Step. time since epoch: 273.084. Train acc: 0.621. Train Loss: 427.346
Step. time since epoch: 294.086. Train acc: 0.625. Train Loss: 420.656
Step. time

KeyboardInterrupt: ignored

Данная модель быстрее обучается и чуть возросло качество с 91% на 94%. Впервые за время 1489.115.