In [0]:
import cv2
import matplotlib.pylab as plt
import numpy as np
import random
import math
import pandas as pd
import glob

import torchvision
import torchvision.transforms as transforms

%matplotlib inline
plt.style.use('default')

import warnings
warnings.filterwarnings('ignore')

from torch.utils.data import Dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm
from torch import nn
import torch.nn.functional as F

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import plotly.offline

https://habr.com/ru/company/avito/blog/485290/

In [2]:
seed = 228
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

train_set = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=2048,
                                          shuffle=True, num_workers=2)

test_set = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=2048,
                                         shuffle=False, num_workers=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Files already downloaded and verified
Files already downloaded and verified


In [0]:
def train_smart(model, num_epochs=10):

    # params_to_update = []
    # for name, param in model.named_parameters():
    #     if param.requires_grad == True:
    #         params_to_update.append(param)
    #         print("\t",name)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    #optimizer = torch.optim.Adam(params_to_update, lr=1e-4)

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)
            
            y_pred = model(X)
            loss = criterion(y_pred, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')
        torch.save(model.state_dict(), 'smart')

    return model

In [4]:
smart = torchvision.models.resnet18(pretrained=True)

# def set_parameter_requires_grad(model):
#     for param in model.parameters():
#         param.requires_grad = False

# set_parameter_requires_grad(smart)

smart.fc = nn.Linear(smart.fc.in_features, 10)
smart = smart.to(device)

smart = train_smart(smart, num_epochs=10)

smart.load_state_dict(torch.load("smart"))
smart.eval()

epoch: 1, loss: 1.7121914697647094, accuracy: 0.409060001373291
epoch: 2, loss: 0.9376758997154235, accuracy: 0.6757999658584595
epoch: 3, loss: 0.6782147241783142, accuracy: 0.7659800052642822
epoch: 4, loss: 0.5154062458610534, accuracy: 0.8246999979019165
epoch: 5, loss: 0.3835704894733429, accuracy: 0.8769999742507935
epoch: 6, loss: 0.27326295523643496, accuracy: 0.9203799962997437
epoch: 7, loss: 0.18327909275054932, accuracy: 0.954479992389679
epoch: 8, loss: 0.11672815702915192, accuracy: 0.9774599671363831
epoch: 9, loss: 0.06980964313268662, accuracy: 0.9894399642944336
epoch: 10, loss: 0.04258720884084702, accuracy: 0.9951399564743042


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [0]:
def test(model):
    y_pred = np.array([])
    y_true = np.array([])

    for batch in test_loader:
        X, y = batch
        X = X.to(device)
        y = y.to(device)

        output = model(X)
        
        batch_pred = torch.argmax(output, dim=1)
        y_pred = np.append(y_pred, batch_pred.cpu().numpy())
        y_true = np.append(y_true, y.cpu().numpy())

    y_pred = y_pred.astype(int)

    acc = accuracy_score(y_pred, y_true)

    return acc

In [6]:
res_smart = test(smart)

print(res_smart)

0.7613


In [0]:
class primitive(nn.Module):
    def __init__(self):
        super(primitive, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [0]:
def train_primitive(model, num_epochs=10):

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)
            
            y_pred = model(X)
            loss = criterion(y_pred, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')

    return model

In [9]:
conv_nn = primitive()
conv_nn.to(device)
conv_nn = train_primitive(conv_nn, 20)

conv_nn.eval()

epoch: 1, loss: 2.1335972854614256, accuracy: 0.21639999747276306
epoch: 2, loss: 1.847334750289917, accuracy: 0.32651999592781067
epoch: 3, loss: 1.6951861625289917, accuracy: 0.3847399950027466
epoch: 4, loss: 1.6065867197036743, accuracy: 0.4134199917316437
epoch: 5, loss: 1.552321163711548, accuracy: 0.43347999453544617
epoch: 6, loss: 1.5062294190216063, accuracy: 0.45399999618530273
epoch: 7, loss: 1.4655480351638794, accuracy: 0.47005999088287354
epoch: 8, loss: 1.4343947258377074, accuracy: 0.4810599982738495
epoch: 9, loss: 1.4085931601333619, accuracy: 0.48993998765945435
epoch: 10, loss: 1.3787116205596923, accuracy: 0.5030800104141235
epoch: 11, loss: 1.3600501608657838, accuracy: 0.5091999769210815
epoch: 12, loss: 1.3341531991195679, accuracy: 0.5194000005722046
epoch: 13, loss: 1.3219798076629639, accuracy: 0.525119960308075
epoch: 14, loss: 1.3002315266799926, accuracy: 0.5342599749565125
epoch: 15, loss: 1.2838537795257567, accuracy: 0.5397199988365173
epoch: 16, loss:

primitive(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [10]:
res_primitive = test(conv_nn)

print(res_primitive)

0.5486


In [0]:
def train_primitive_with_smart(model, smart, a=0.5, num_epochs=10):

    ce = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    mse = torch.nn.MSELoss()

    for epoch in range(num_epochs):
        elements = 0
        mean_loss = 0.0
        acc_rate = 0.0
        for X, y in train_loader:
            X = X.to(device)
            y = y.to(device)

            
            y_pred = model(X)
            smart_y_pred = smart(X)
            loss = a * ce(y_pred, y) + (1 - a) * mse(F.softmax(y_pred, dim=-1), F.softmax(smart_y_pred, dim=-1))
            #loss = a * ce(y_pred, y) + (1 - a) * mse(y_pred, smart_y_pred)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            mean_loss += loss.item() * len(X)
            acc_rate += torch.sum(y == torch.max(y_pred, 1)[1])
            elements += len(X)

        total_loss = mean_loss / elements
        total_acc = acc_rate / elements

        print(f'epoch: {epoch + 1}, loss: {total_loss}, accuracy: {total_acc}')

    return model

In [12]:
conv_nn = primitive()
conv_nn.to(device)
conv_nn = train_primitive_with_smart(conv_nn, smart, 0.2, 20)

conv_nn.eval()

epoch: 1, loss: 0.4748212748336792, accuracy: 0.23517999053001404
epoch: 2, loss: 0.3847342297649384, accuracy: 0.3861599862575531
epoch: 3, loss: 0.3518998817253113, accuracy: 0.44933998584747314
epoch: 4, loss: 0.3247924182319641, accuracy: 0.49347999691963196
epoch: 5, loss: 0.3095993337059021, accuracy: 0.5216000080108643
epoch: 6, loss: 0.29652205717086794, accuracy: 0.5465399622917175
epoch: 7, loss: 0.2868335312271118, accuracy: 0.5622000098228455
epoch: 8, loss: 0.27418550459861757, accuracy: 0.5842800140380859
epoch: 9, loss: 0.26591174990653993, accuracy: 0.5974400043487549
epoch: 10, loss: 0.2585857437038422, accuracy: 0.6093199849128723
epoch: 11, loss: 0.25197030241966245, accuracy: 0.6212199926376343
epoch: 12, loss: 0.2472033537197113, accuracy: 0.6302399635314941
epoch: 13, loss: 0.2398144023990631, accuracy: 0.6400200128555298
epoch: 14, loss: 0.2348843421792984, accuracy: 0.6468799710273743
epoch: 15, loss: 0.2298835883140564, accuracy: 0.6570199728012085
epoch: 16, l

primitive(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [13]:
res_primitive_with_smart = test(conv_nn)

print(res_primitive_with_smart)

0.6178


In [14]:
print(f"smart accuracy: {res_smart}")
print(f"primitive accruracy: {res_primitive}")
print(f"primitive with smart accuracy: {res_primitive_with_smart}")

smart accuracy: 0.7613
primitive accruracy: 0.5486
primitive with smart accuracy: 0.6178
