In [0]:
import cv2
import matplotlib.pylab as plt
import numpy as np
import random
import math
import pandas as pd
import glob

import torchvision
import torchvision.transforms as transforms

%matplotlib inline
plt.style.use('default')

import warnings
warnings.filterwarnings('ignore')

from torch.utils.data import Dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm
from torch import nn
import torch.nn.functional as F

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import plotly.offline

https://habr.com/ru/company/avito/blog/485290/

In [2]:
seed = 228
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=2048,
                                          shuffle=True, num_workers=2)

test_set = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=2048,
                                         shuffle=False, num_workers=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Files already downloaded and verified
Files already downloaded and verified


In [0]:
def train_smart(model, num_epochs=10):

    # params_to_update = []
    # for name, param in model.named_parameters():
    #     if param.requires_grad == True:
    #         params_to_update.append(param)
    #         print("\t",name)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    #optimizer = torch.optim.Adam(params_to_update, lr=1e-4)

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)
            
            y_pred = model(X)
            loss = criterion(y_pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')
        torch.save(model.state_dict(), 'smart')

    return model

In [4]:
smart = torchvision.models.resnet18(pretrained=True)

# def set_parameter_requires_grad(model):
#     for param in model.parameters():
#         param.requires_grad = False

# set_parameter_requires_grad(smart)

smart.fc = nn.Linear(smart.fc.in_features, 10)
smart = smart.to(device)

smart = train_smart(smart, num_epochs=10)

smart.load_state_dict(torch.load("smart"))
smart.eval()

epoch: 1, loss: 1.7122296115493774, accuracy: 0.4088200032711029
epoch: 2, loss: 0.9379380323410034, accuracy: 0.6752600073814392
epoch: 3, loss: 0.6786848701477051, accuracy: 0.7664200067520142
epoch: 4, loss: 0.5159601857566833, accuracy: 0.8252999782562256
epoch: 5, loss: 0.3842359911441803, accuracy: 0.8771799802780151
epoch: 6, loss: 0.2735558604717255, accuracy: 0.9199399948120117
epoch: 7, loss: 0.18355409548282622, accuracy: 0.9545999765396118
epoch: 8, loss: 0.1169557518863678, accuracy: 0.9775399565696716
epoch: 9, loss: 0.0698659116601944, accuracy: 0.9892399907112122
epoch: 10, loss: 0.042724183213710784, accuracy: 0.9953599572181702


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [0]:
def test(model):
    y_pred = np.array([])
    y_true = np.array([])

    for batch in test_loader:
        X, y = batch
        X = X.to(device)
        y = y.to(device)

        output = model(X)
        
        batch_pred = torch.argmax(output, dim=1)
        y_pred = np.append(y_pred, batch_pred.cpu().numpy())
        y_true = np.append(y_true, y.cpu().numpy())

    y_pred = y_pred.astype(int)

    acc = accuracy_score(y_pred, y_true)

    return acc

In [6]:
res_smart = test(smart)

print(res_smart)

0.7579


In [0]:
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

class primitive(nn.Module):
    def __init__(self):
        super(primitive, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [0]:
def train_primitive(model, num_epochs=10):

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)
            
            y_pred = model(X)
            loss = criterion(y_pred, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')

    return model

In [9]:
conv_nn = primitive()
conv_nn.to(device)
conv_nn = train_primitive(conv_nn, 15)

conv_nn.eval()

epoch: 1, loss: 2.133589452896118, accuracy: 0.216279998421669
epoch: 2, loss: 1.8475012383651734, accuracy: 0.3265799880027771
epoch: 3, loss: 1.6950228954696656, accuracy: 0.38492000102996826
epoch: 4, loss: 1.6065207349395751, accuracy: 0.4137199819087982
epoch: 5, loss: 1.5523322343063355, accuracy: 0.4337199926376343
epoch: 6, loss: 1.5062968605422973, accuracy: 0.4536399841308594
epoch: 7, loss: 1.4660519107818604, accuracy: 0.46949997544288635
epoch: 8, loss: 1.4347771736145019, accuracy: 0.48085999488830566
epoch: 9, loss: 1.4084605392837524, accuracy: 0.49069997668266296
epoch: 10, loss: 1.3788303916168212, accuracy: 0.5029799938201904
epoch: 11, loss: 1.3600088082885742, accuracy: 0.5089399814605713
epoch: 12, loss: 1.3338526570510865, accuracy: 0.5203199982643127
epoch: 13, loss: 1.320792894592285, accuracy: 0.5257599949836731
epoch: 14, loss: 1.2988951033782958, accuracy: 0.5342000126838684
epoch: 15, loss: 1.283263264427185, accuracy: 0.5397999882698059


primitive(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [10]:
res_primitive = test(conv_nn)

print(res_primitive)

0.5243


In [0]:
def train_primitive_with_smart(model, smart, a=0.5, num_epochs=10):

    ce = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    mse = torch.nn.MSELoss()

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)

            
            y_pred = model(X)
            smart_y_pred = smart(X)
            loss = a * ce(y_pred, y) + (1 - a) * mse(F.softmax(y_pred, dim=-1), F.softmax(smart_y_pred, dim=-1))
            #loss = a * ce(y_pred, y) + (1 - a) * mse(y_pred, smart_y_pred)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')

    return model

In [12]:
conv_nn = primitive()
conv_nn.to(device)
conv_nn = train_primitive_with_smart(conv_nn, smart, 0.2, 15)

conv_nn.eval()

epoch: 1, loss: 0.4655162782382965, accuracy: 0.24966000020503998
epoch: 2, loss: 0.3780401555633545, accuracy: 0.4050399959087372
epoch: 3, loss: 0.3405739925670624, accuracy: 0.4686799943447113
epoch: 4, loss: 0.3182104660606384, accuracy: 0.5098599791526794
epoch: 5, loss: 0.30014724445343016, accuracy: 0.540120005607605
epoch: 6, loss: 0.28692616218566896, accuracy: 0.5619400143623352
epoch: 7, loss: 0.27688231113433837, accuracy: 0.5802599787712097
epoch: 8, loss: 0.2653952394771576, accuracy: 0.5974799990653992
epoch: 9, loss: 0.25659732421875, accuracy: 0.6127600073814392
epoch: 10, loss: 0.25022766593933105, accuracy: 0.6218799948692322
epoch: 11, loss: 0.24178652387619018, accuracy: 0.6378799676895142
epoch: 12, loss: 0.2372308439016342, accuracy: 0.6427199840545654
epoch: 13, loss: 0.23289795170783997, accuracy: 0.6508600115776062
epoch: 14, loss: 0.22570449667453765, accuracy: 0.6628999710083008
epoch: 15, loss: 0.22238100285053253, accuracy: 0.6659799814224243


primitive(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [13]:
res_primitive_with_smart = test(conv_nn)

print(res_primitive_with_smart)

0.5916


In [14]:
print(f"smart accuracy: {res_smart}")
print(f"primitive accruracy: {res_primitive}")
print(f"primitive with smart accuracy: {res_primitive_with_smart}")

smart accuracy: 0.7579
primitive accruracy: 0.5243
primitive with smart accuracy: 0.5916


Итого: resnet помог нашей слабой архитектуре повысить своё качество на почти 0.07 accuracy, сохранив то же самое количество эпох при обучении.