# Эксперименты с классификатором

## Подготовка данных

In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
import torchmetrics
import torch.nn.functional as F

from torch import nn
from pytorch_lightning import LightningModule
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger

from src.models.autoencoder import MyAutoencoder
from src.utils import grid_plot, vis_confusion

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = CIFAR10('../data/', train=True, download=True)

mean = data.data.mean(axis=(0,1,2))/255
std = data.data.std(axis=(0,1,2))/255
print(f'mean: {mean}')
print(f'std: {std}')

Files already downloaded and verified
mean: [0.49139968 0.48215841 0.44653091]
std: [0.24703223 0.24348513 0.26158784]


In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


# Prepare test data
cifar_test = CIFAR10('../data/', train=False, download=True, transform=transform)
test_dataloader = torch.utils.data.DataLoader(dataset=cifar_test, batch_size=1000)


# Prepare train/val data
cifar_train = CIFAR10('../data/', train=True, download=True, transform=transform)

val_size = 2000
train_size= len(cifar_train) - val_size
torch.manual_seed(42)
cifar_train, cifar_val = torch.utils.data.random_split(cifar_train, [train_size, val_size])


train_dataloader = torch.utils.data.DataLoader(dataset=cifar_train, batch_size=1000, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(dataset=cifar_val, batch_size=1000)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

classes = {label: i for i, label in enumerate(classes)}

## Модели

### Автоэнкодер

In [5]:
autoencoder_weights = '../models/autoencoder_without_love.pth'

autoencoder = MyAutoencoder()
autoencoder.load_state_dict(torch.load(autoencoder_weights))

<All keys matched successfully>

### Классификатор

In [6]:
class Classifier(LightningModule):
    def __init__(self, autoencoder, classifier, classes, lr):
        super().__init__()

        self._autoencoder = autoencoder
        self._autoencoder.requires_grad_(False)
        self.encoder = self._autoencoder.encoder

        self.clf = classifier

        self.classes = classes
        self.lr = lr

        # Quality metrics
        self.accuracy = torchmetrics.Accuracy('multiclass', num_classes=len(self.classes))
        self.conf_matrix = torchmetrics.ConfusionMatrix('multiclass', num_classes=len(self.classes))

    def forward(self, x):
        # Freeze the encoder
        self.encoder.eval()
        with torch.no_grad():
            encoded = self.encoder(x).flatten(1)
        
        x = self.clf(encoded)
        return x

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        y_pred = self.forward(x)
        loss = F.cross_entropy(y_pred, y)

        # Logs
        self.logger.experiment.add_scalars('Loss', 
                                           {'train loss': loss}, 
                                           global_step=self.global_step)
        self.logger.experiment.add_scalars('Accuracy', 
                                           {'train accuracy': self.accuracy(y_pred, y)}, 
                                           global_step=self.global_step)

        return loss
        
    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        y_pred = self.forward(x)
        loss = F.cross_entropy(y_pred, y)


        # Logs
        self.logger.experiment.add_scalars('Loss', 
                                           {'val loss': loss}, 
                                           global_step=self.global_step)
        self.logger.experiment.add_scalars('Accuracy', 
                                           {'val accuracy': self.accuracy(y_pred, y)}, 
                                           global_step=self.global_step)
        matrix = self.conf_matrix(y_pred, y)
        vis_confusion(self.logger.experiment, 'val', self.global_step, matrix, self.classes)

        return loss

    def test_step(self, test_batch, batch_idx):
        x, y = test_batch
        y_pred = self.forward(x)

        # Logs
        self.logger.experiment.add_scalars('Accuracy', 
                                           {'test accuracy': self.accuracy(y_pred, y)}, 
                                           global_step=self.global_step)
        matrix = self.conf_matrix(y_pred, y)
        vis_confusion(self.logger.experiment, 'test', self.global_step, matrix, self.classes)


In [9]:
def train(clf, name, max_epochs=10):
    net = Classifier(autoencoder, clf, classes, lr=1e-3)

    logger = TensorBoardLogger('', name='runs_clf', version=name)

    trainer = Trainer(max_epochs=max_epochs, logger=logger)
    trainer.fit(net, train_dataloader, val_dataloader)

    return net, trainer

In [10]:
clf = nn.Sequential(
    nn.Linear(1024, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(),

    nn.Linear(128, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),

    nn.Linear(256, 10),
    )

import time
now = int(time.time())

net, trainer = train(clf, f'run_{now}', 10)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type                      | Params
-----------------------------------------------------------
0 | _autoencoder | MyAutoencoder             | 8.2 K 
1 | encoder      | Sequential                | 4.1 K 
2 | clf          | Sequential                | 167 K 
3 | accuracy     | MulticlassAccuracy        | 0     
4 | conf_matrix  | MulticlassConfusionMatrix | 0     
-----------------------------------------------------------
167 K     Trainable params
8.2 K     Non-trainable params
175 K     Total params
0.703     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00,  1.21it/s]

  ax.set_xticklabels([''] + all_categories, rotation=90)
  ax.set_yticklabels([''] + all_categories)


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 2:  44%|████▍     | 22/50 [02:30<03:10,  6.82s/it, loss=1.44, v_num=8067]
Epoch 9: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, loss=0.97, v_num=8257] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, loss=0.97, v_num=8257]


## Оценка качества

In [None]:
trainer.test(dataloaders=test_dataloader)

In [13]:
torch.save(net.state_dict(), '../models/clf_model.pth')