## Лабороторная работа №2

### Импорты

In [4]:
import random
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from pytorch_lightning import seed_everything
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

### Константы

In [5]:
seed_everything(42, workers=True)

INFO:lightning_fabric.utilities.seed:Seed set to 42


42

### Загрузка и подготовка данных

In [6]:
def prepare_data():
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])

    dataset = datasets.CIFAR100(root="./data", train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR100(root="./data", train=False, download=True, transform=transform)

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size

    train_dataset, val_dataset = random_split(dataset, [train_size, val_size],
                                              generator=torch.Generator().manual_seed(42))

    return train_dataset, val_dataset, test_dataset

train_dataset, val_dataset, test_dataset = prepare_data()

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:03<00:00, 45.2MB/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified




### AlexNet

In [7]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

### Модуль обучения на PyTorch Lightning


In [8]:
class LitAlexNet(pl.LightningModule):
    def __init__(self, num_classes=100, lr=1e-3):
        super(LitAlexNet, self).__init__()
        self.model = AlexNet(num_classes=num_classes)
        self.loss_fn = nn.CrossEntropyLoss()
        self.lr = lr

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        loss = self.loss_fn(preds, y)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        loss = self.loss_fn(preds, y)
        acc = (preds.argmax(dim=1) == y).float().mean()
        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("val_acc", acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        acc = (preds.argmax(dim=1) == y).float().mean()
        self.log("test_acc", acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

### Обучение модели

In [9]:
class CustomEarlyStopping(EarlyStopping):
    def __init__(self, monitor="val_loss", min_delta=0.01, patience=3, verbose=True, mode="min", stopping_threshold=0.2):
        super().__init__(monitor=monitor, min_delta=min_delta, patience=patience, verbose=verbose, mode=mode)
        self.stopping_threshold = stopping_threshold

    def on_validation_end(self, trainer, pl_module):
        logs = trainer.callback_metrics
        current = logs.get(self.monitor)
        if current is not None and current <= self.stopping_threshold:
            trainer.should_stop = True
        super().on_validation_end(trainer, pl_module)

model = LitAlexNet(num_classes=100, lr=1e-3)

early_stopping = CustomEarlyStopping(
    monitor="val_loss",
    min_delta=0.01,
    patience=3,
    verbose=True,
    mode="min",
    stopping_threshold=0.2  # val_loss -> 0.2
)

trainer = pl.Trainer(
    max_epochs=10,  # количество эпох
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    devices=1,
    precision=16 if torch.cuda.is_available() else 32,
    log_every_n_steps=10,
    callbacks=[early_stopping]
)

trainer.fit(model, train_loader, val_loader)

/usr/local/lib/python3.11/dist-packages/lightning_fabric/connector.py:572: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | AlexNet          | 57.4 M | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
57.4 M    Trainable params
0         Non-trainable params
57.4 M    Total params
2

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved. New best score: 3.924


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.329 >= min_delta = 0.01. New best score: 3.595


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.201 >= min_delta = 0.01. New best score: 3.394


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.196 >= min_delta = 0.01. New best score: 3.198


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.120 >= min_delta = 0.01. New best score: 3.078


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.168 >= min_delta = 0.01. New best score: 2.910


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.019 >= min_delta = 0.01. New best score: 2.891


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_loss improved by 0.064 >= min_delta = 0.01. New best score: 2.827
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


### Тестирование модели

In [10]:
trainer.test(model, test_loader)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_acc': 0.30809998512268066}]


## Выводы по модели AlexNet

Достигнутая точность на тестовой выборке составляет 30.81%. Это указывает на то, что модель способна правильно классифицировать около 1/3 изображений из CIFAR-100.

Модель была обучена с использованием ранней остановки. Обучение завершилось, когда функция потерь на валидационных данных опустилась ниже порога 0.2, что предотвратило избыточное переобучение.

CIFAR-100 — это сложный датасет, содержащий 100 классов с разнообразными объектами.

Применение расширения данных, таких как случайные обрезки, горизонтальное отражение и изменение цвета, чтобы улучшить генерализуемость модели.
Подбор гиперпараметров (например, использование оптимизатора или модификация скорости обучения) для улучшения сходимости модели.