In [0]:
import cv2
import matplotlib.pylab as plt
import numpy as np
import random
import math
import pandas as pd
import glob

import torchvision
import torchvision.transforms as transforms

%matplotlib inline
plt.style.use('default')

import warnings
warnings.filterwarnings('ignore')

from torch.utils.data import Dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm
from torch import nn
import torch.nn.functional as F

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import plotly.offline

https://habr.com/ru/company/avito/blog/485290/

In [2]:
seed = 228
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=2048,
                                          shuffle=True, num_workers=2)

test_set = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=2048,
                                         shuffle=False, num_workers=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Files already downloaded and verified
Files already downloaded and verified


In [0]:
def train_smart(model, num_epochs=10):

    # params_to_update = []
    # for name, param in model.named_parameters():
    #     if param.requires_grad == True:
    #         params_to_update.append(param)
    #         print("\t",name)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    #optimizer = torch.optim.Adam(params_to_update, lr=1e-4)

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)
            
            y_pred = model(X)
            loss = criterion(y_pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')
        torch.save(model.state_dict(), 'smart')

    return model

In [4]:
smart = torchvision.models.resnet18(pretrained=True)

# def set_parameter_requires_grad(model):
#     for param in model.parameters():
#         param.requires_grad = False

# set_parameter_requires_grad(smart)

smart.fc = nn.Linear(smart.fc.in_features, 10)
smart = smart.to(device)

smart = train_smart(smart, num_epochs=10)

smart.load_state_dict(torch.load("smart"))
smart.eval()

epoch: 1, loss: 1.7120868599319459, accuracy: 0.40901997685432434
epoch: 2, loss: 0.9375466221046448, accuracy: 0.6752200126647949
epoch: 3, loss: 0.6782752262878418, accuracy: 0.7661399841308594
epoch: 4, loss: 0.5155905470657348, accuracy: 0.8249799609184265
epoch: 5, loss: 0.38413483410835264, accuracy: 0.8766399621963501
epoch: 6, loss: 0.2737151264381409, accuracy: 0.9192999601364136
epoch: 7, loss: 0.18390860013008117, accuracy: 0.9540799856185913
epoch: 8, loss: 0.11677001179695129, accuracy: 0.9776600003242493
epoch: 9, loss: 0.06975768966913223, accuracy: 0.9893800020217896
epoch: 10, loss: 0.04248692395210266, accuracy: 0.9951799511909485


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [0]:
def test(model):
    y_pred = np.array([])
    y_true = np.array([])

    for batch in test_loader:
        X, y = batch
        X = X.to(device)
        y = y.to(device)

        output = model(X)
        
        batch_pred = torch.argmax(output, dim=1)
        y_pred = np.append(y_pred, batch_pred.cpu().numpy())
        y_true = np.append(y_true, y.cpu().numpy())

    y_pred = y_pred.astype(int)

    acc = accuracy_score(y_pred, y_true)

    return acc

In [6]:
res_smart = test(smart)

print(res_smart)

0.7598


In [0]:
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

class primitive(nn.Module):
    def __init__(self):
        super(primitive, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [0]:
def train_primitive(model, num_epochs=10):

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)
            
            y_pred = model(X)
            loss = criterion(y_pred, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')

    return model

In [9]:
conv_nn = primitive()
conv_nn.to(device)
conv_nn = train_primitive(conv_nn, 15)

conv_nn.eval()

epoch: 1, loss: 2.133591714134216, accuracy: 0.216279998421669
epoch: 2, loss: 1.8475198878860473, accuracy: 0.3264999985694885
epoch: 3, loss: 1.6952074464797973, accuracy: 0.384799987077713
epoch: 4, loss: 1.6061191634368897, accuracy: 0.4139999747276306
epoch: 5, loss: 1.5532233589935303, accuracy: 0.4333999752998352
epoch: 6, loss: 1.5065511503219604, accuracy: 0.4536399841308594
epoch: 7, loss: 1.4662624678421021, accuracy: 0.4694199860095978
epoch: 8, loss: 1.434801008453369, accuracy: 0.48051998019218445
epoch: 9, loss: 1.408804690322876, accuracy: 0.49049997329711914
epoch: 10, loss: 1.3792069511795044, accuracy: 0.5031999945640564
epoch: 11, loss: 1.3606824295043944, accuracy: 0.5090399980545044
epoch: 12, loss: 1.335187605819702, accuracy: 0.5192800164222717
epoch: 13, loss: 1.322454558944702, accuracy: 0.5252799987792969
epoch: 14, loss: 1.301445667037964, accuracy: 0.5336999893188477
epoch: 15, loss: 1.2847859977340699, accuracy: 0.5392799973487854


primitive(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [10]:
res_primitive = test(conv_nn)

print(res_primitive)

0.5255


In [0]:
def train_primitive_with_smart(model, smart, a=0.5, num_epochs=10):

    ce = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    mse = torch.nn.MSELoss()

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)

            
            y_pred = model(X)
            smart_y_pred = smart(X)
            loss = a * ce(y_pred, y) + (1 - a) * mse(F.softmax(y_pred, dim=-1), F.softmax(smart_y_pred, dim=-1))
            #loss = a * ce(y_pred, y) + (1 - a) * mse(y_pred, smart_y_pred)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')

    return model

In [15]:
conv_nn = primitive()
conv_nn.to(device)
conv_nn = train_primitive_with_smart(conv_nn, smart, 0.0, 15)

conv_nn.eval()

epoch: 1, loss: 0.08191188997745513, accuracy: 0.2261199951171875
epoch: 2, loss: 0.07488856539249421, accuracy: 0.33434000611305237
epoch: 3, loss: 0.07078102601528168, accuracy: 0.3835799992084503
epoch: 4, loss: 0.06731973183393479, accuracy: 0.4240399897098541
epoch: 5, loss: 0.06465226093292237, accuracy: 0.4523800015449524
epoch: 6, loss: 0.06226334257364273, accuracy: 0.4759399890899658
epoch: 7, loss: 0.06053111493706703, accuracy: 0.49526000022888184
epoch: 8, loss: 0.058949327641725544, accuracy: 0.5108399987220764
epoch: 9, loss: 0.05740443249940872, accuracy: 0.5263400077819824
epoch: 10, loss: 0.05604466941356659, accuracy: 0.5390399694442749
epoch: 11, loss: 0.05485662794947624, accuracy: 0.5509399771690369
epoch: 12, loss: 0.05357265322566032, accuracy: 0.5619800090789795
epoch: 13, loss: 0.05279749336719513, accuracy: 0.5712199807167053
epoch: 14, loss: 0.05168342544794083, accuracy: 0.5812999606132507
epoch: 15, loss: 0.05097236837506294, accuracy: 0.5870400071144104


primitive(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [16]:
res_primitive_with_smart = test(conv_nn)

print(res_primitive_with_smart)

0.5637


In [17]:
print(f"smart accuracy: {res_smart}")
print(f"primitive accruracy: {res_primitive}")
print(f"primitive with smart accuracy: {res_primitive_with_smart}")

smart accuracy: 0.7598
primitive accruracy: 0.5255
primitive with smart accuracy: 0.5637


Итого: resnet помог нашей слабой архитектуре повысить своё качество на почти 0.03 accuracy, сохранив то же самое количество эпох при обучении и с тем же самым оптимайзером.