Задание: взять mnist и сравнить два варианта: resnet и inception v3. сравнить и предобученные и обученные на mnist с нуля (и предобученные модели и сами архитектуры должны быть доступны в model_zoo и у torch, и у tensorflow)


In [32]:
!nvidia-smi

Thu Dec 17 09:05:33 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.45.01    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   72C    P0    33W /  70W |   8683MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [15]:
from torchvision.models.resnet import ResNet, BasicBlock
from torchvision import models
from torchvision.datasets import MNIST
from tqdm.autonotebook import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import inspect
import time
from torch import nn, optim
import torch
from torchvision.transforms import Compose, ToTensor, Normalize, Resize
from torch.utils.data import DataLoader
from torch.utils import model_zoo
import time
from sklearn import metrics
from tqdm import tqdm
import numpy as np
import pandas as pd
import warnings
from tabulate import tabulate
warnings.filterwarnings("ignore")

In [16]:
def get_data_loaders(train_batch_size, test_batch_size, inception):
  """
  загрузчики данных + 
  преобразование размеров картинок к 224х224 в случае resnet и 299х299 в случае inception + нормализация
  """
  mnist = MNIST(download=True, train=True, root=".").train_data.float()
  transformer = Compose([Resize((299, 299)),ToTensor(), Normalize((mnist.mean()/255,), (mnist.std()/255,))]) if inception else Compose([Resize((224, 224)),ToTensor(), Normalize((mnist.mean()/255,), (mnist.std()/255,))]) 
  train_loader = DataLoader(MNIST(download=True, root=".", transform=transformer, train=True), batch_size=train_batch_size, shuffle=True)
  test_loader = DataLoader(MNIST(download=True, root=".", transform=transformer, train=False), batch_size=test_batch_size, shuffle=False)
  return train_loader, test_loader

In [17]:
def fine_tune_resnet(model):
    """
    файн-тьюним архитектуру ResNet под нашу задачу:
    вместо 3-канального изображения на вход - одноканальное
    на выходе вместо слоя из 1000 нейронов (классы ImageNet) - слой из 10 нейронов (классы MNIST)
    """
    model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    model.fc = nn.Linear(512, 10, bias=True)
    return model

In [18]:
def fine_tune_inception(model):
    """
    файн-тьюним архитектуру Inception под нашу задачу:
    в 3 канала на вход кладем одинаковое исходное изображение
    на выходе вместо слоя из 1000 нейронов (классы ImageNet) - слой из 10 нейронов (классы MNIST)
    """
    model.fc = nn.Linear(2048, 10, bias=True)
    return model

In [19]:
def train_model(model, epoch, train_loader, optimizer, criterion, inception):
    """
    обучение модели
    """
    print('\nTraining model...')
    total = 0
    correct = 0
    start = time.time()

    for epoch in range(epoch):
        for i, data in enumerate(tqdm(train_loader), 1):
            images, labels = data
            if torch.cuda.is_available():
                
                images = images.cuda().expand(images.shape[0], 3, 299, 299) if inception else images.cuda()
                labels = labels.cuda()

            optimizer.zero_grad()
            outputs = model(images)

            _, predicted = torch.max(outputs[0].data, 1) if inception else torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            loss = criterion(outputs[0], labels) if inception else criterion(outputs, labels)
            loss_i = np.round(loss.item(), 2)

            loss.backward()
            optimizer.step()

        print('\nEpoch: {} Loss: {}'.format(epoch + 1, loss_i))

    print('\nTraining Completed in: {} secs'.format(time.time() - start))
    print('Training accuracy: {}'.format(np.round(correct / total, 4)))

In [20]:
def test_model(model, test_loader, inception):
    """
    тестирование модели
    """
    print('\nTesting model...')
    start = time.time()

    predictions = torch.LongTensor()
    labels = torch.LongTensor()
    for i, data in tqdm(enumerate(test_loader, 1)):
        images = data[0].cuda().expand(data[0].shape[0], 3, 299, 299) if inception else data[0].cuda()
        lbls = data[1]
        with torch.no_grad():
            outputs = model(images)
        pred = outputs[0].cpu().data.max(1, keepdim=True)[1] if inception else outputs.cpu().data.max(1, keepdim=True)[1]
        labels = torch.cat((labels, lbls), dim=0)
        predictions = torch.cat((predictions, pred), dim=0)

    print('\nTesting finished. Time spent: {}'.format(time.time() - start))
    acc = np.round(metrics.accuracy_score(predictions, labels), 4)
    f1_macro = np.round(metrics.f1_score(predictions, labels, average='macro'), 4)
    f1_micro = np.round(metrics.f1_score(predictions, labels, average='micro'), 4)
    return acc, f1_macro, f1_micro


In [21]:
def evaluate_resnet(pretrained=True):
  """
  Тестирование модели ResNet18
  pretrained: предобученный/непредобученный вариант
  """
  mdl = models.resnet18(pretrained=pretrained)
  mdl = fine_tune_resnet(mdl)

  if torch.cuda.is_available():
      mdl = mdl.cuda()

  train_batch_size=64
  test_batch_size=64

  train_loader, test_loader = get_data_loaders(train_batch_size=train_batch_size, test_batch_size=test_batch_size, inception=False)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(mdl.parameters(), lr=0.001, momentum=0.9)

  train_model(mdl, epoch=2, train_loader=train_loader, optimizer=optimizer, criterion=criterion, inception=False)
  return test_model(mdl, test_loader=test_loader, inception=False)

In [34]:
def evaluate_inception(pretrained=True):
  """
  Тестирование модели Inception v3
  pretrained: предобученный/непредобученный вариант
  """
  mdl = models.inception_v3(pretrained=pretrained)
  mdl = fine_tune_inception(mdl)

  if torch.cuda.is_available():
      mdl = mdl.cuda()

  train_batch_size=64
  test_batch_size=64

  train_loader, test_loader = get_data_loaders(train_batch_size=train_batch_size, test_batch_size=test_batch_size, inception=True)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(mdl.parameters(), lr=0.001, momentum=0.9)

  train_model(mdl, epoch=1, train_loader=train_loader, optimizer=optimizer, criterion=criterion, inception=True)
  return test_model(mdl, test_loader=test_loader, inception=True)

Сравнение результатов четырех моделей:

In [23]:
results = pd.DataFrame(columns=['Model', 'Accuracy', 'F1_score macro', 'F1_score micro'])

In [24]:
acc, f1_macro, f1_micro = evaluate_resnet(pretrained=True)
results = results.append(pd.Series(['Pretrained ResNet', acc, f1_macro, f1_micro], index=results.columns), ignore_index=True)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




  0%|          | 1/938 [00:00<02:25,  6.46it/s]


Training model...


100%|██████████| 938/938 [02:53<00:00,  5.41it/s]
  0%|          | 1/938 [00:00<01:47,  8.68it/s]


Epoch: 1 Loss: 0.06


100%|██████████| 938/938 [02:53<00:00,  5.42it/s]
1it [00:00,  9.29it/s]


Epoch: 2 Loss: 0.01

Training Completed in: 346.6672205924988 secs
Training accuracy: 0.9818

Testing model...


157it [00:14, 10.63it/s]


Testing finished. Time spent: 14.771375179290771





In [27]:
acc, f1_macro, f1_micro = evaluate_resnet(pretrained=False)
results = results.append(pd.Series(['Not-pretrained ResNet', acc, f1_macro, f1_micro], index=results.columns), ignore_index=True)

  0%|          | 1/938 [00:00<01:57,  7.95it/s]


Training model...


100%|██████████| 938/938 [02:54<00:00,  5.39it/s]
  0%|          | 1/938 [00:00<01:50,  8.50it/s]


Epoch: 1 Loss: 0.19


100%|██████████| 938/938 [02:53<00:00,  5.40it/s]
1it [00:00,  9.33it/s]


Epoch: 2 Loss: 0.03

Training Completed in: 347.8435904979706 secs
Training accuracy: 0.9069

Testing model...


157it [00:14, 10.47it/s]


Testing finished. Time spent: 14.997379302978516





In [29]:
acc, f1_macro, f1_micro = evaluate_inception(pretrained=True)
results = results.append(pd.Series(['Pretrained Inception v3', acc, f1_macro, f1_micro], index=results.columns), ignore_index=True)

  0%|          | 0/938 [00:00<?, ?it/s]


Training model...


100%|██████████| 938/938 [13:54<00:00,  1.12it/s]
0it [00:00, ?it/s]


Epoch: 1 Loss: 0.12

Training Completed in: 834.9380087852478 secs
Training accuracy: 0.9382

Testing model...


157it [00:48,  3.23it/s]


Testing finished. Time spent: 48.60470128059387





In [35]:
acc, f1_macro, f1_micro = evaluate_inception(pretrained=False)
results = results.append(pd.Series(['Not-pretrained Inception v3', acc, f1_macro, f1_micro], index=results.columns), ignore_index=True)

  0%|          | 0/938 [00:00<?, ?it/s]


Training model...


100%|██████████| 938/938 [13:59<00:00,  1.12it/s]
0it [00:00, ?it/s]


Epoch: 1 Loss: 0.28

Training Completed in: 839.6565806865692 secs
Training accuracy: 0.6799

Testing model...


157it [00:48,  3.21it/s]


Testing finished. Time spent: 48.88857173919678





In [36]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(tabulate(results, headers='keys', tablefmt='psql'))

+----+-----------------------------+------------+------------------+------------------+
|    | Model                       |   Accuracy |   F1_score macro |   F1_score micro |
|----+-----------------------------+------------+------------------+------------------|
|  0 | Pretrained ResNet           |     0.9937 |           0.9937 |           0.9937 |
|  1 | Not-pretrained ResNet       |     0.9842 |           0.9841 |           0.9842 |
|  2 | Pretrained Inception v3     |     0.9919 |           0.9918 |           0.9919 |
|  3 | Not-pretrained Inception v3 |     0.9479 |           0.9474 |           0.9479 |
+----+-----------------------------+------------+------------------+------------------+


Вывод: для обучения inception нужно больше эпох, тк модель объемнее.