In [1]:
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pytorch_lightning.loggers as loggers
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Определяем LightningDataModule для подготовки CIFAR100
class CIFAR100DataModule(pl.LightningDataModule):
    def __init__(self, batch_size=16, seed=42):
        super().__init__()
        self.batch_size = batch_size
        self.seed = seed
        # Определяем трансформации для обучающей выборки
        self.transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        ])
        # Определяем трансформации для тестовой выборки
        self.transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        ])

    def prepare_data(self):
        datasets.CIFAR100(root='./data', train=True, download=True)
        datasets.CIFAR100(root='./data', train=False, download=True)

    def setup(self, stage=None):
        # Подготовка обучающих и валидационных датасетов
        cifar100_full = datasets.CIFAR100(root='./data', train=True, transform=self.transform_train)
        train_indices, val_indices = train_test_split(range(len(cifar100_full)), test_size=0.2, random_state=self.seed)
        self.train_dataset = torch.utils.data.Subset(cifar100_full, train_indices)
        self.val_dataset = torch.utils.data.Subset(cifar100_full, val_indices)
        self.test_dataset = datasets.CIFAR100(root='./data', train=False, transform=self.transform_test)

    def train_dataloader(self):
        # DataLoader для обучающей выборки
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        # DataLoader для валидационной выборки
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)

    def test_dataloader(self):
        # DataLoader для тестовой выборки
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)

In [5]:
class AlexNet(pl.LightningModule):
    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(96)
        self.conv2 = nn.Conv2d(96, 256, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(256)
        self.conv3 = nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(384)
        self.conv4 = nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(384)
        self.conv5 = nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2d(256)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(256 * 1 * 1, 4096) 
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, num_classes)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool(x)
        
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)

        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool(x)

        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool(x)

        x = F.relu(self.bn5(self.conv5(x)))
        x = self.pool(x)

        x = x.view(x.size(0), -1)  
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.5, training=self.training)
        
        x = self.fc3(x)

        return x

    def training_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = F.cross_entropy(outputs, labels)
        self.log('train_loss', loss)  
        return loss

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = F.cross_entropy(outputs, labels)
        self.log('val_loss', loss)

    def test_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = F.cross_entropy(outputs, labels)
        _, predicted = torch.max(outputs.data, dim=1)
        accuracy = (predicted == labels).float().mean()
        
        self.log('test_loss', loss)
        self.log('test_accuracy', accuracy)

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)


In [6]:
# Инициализация LightningDataModule и модели
data_module = CIFAR100DataModule(batch_size=16, seed=42)
model = AlexNet(num_classes=100)

# Инициализация тренера
trainer = pl.Trainer(max_epochs=10, logger=loggers.TensorBoardLogger("logs/"))

# Обучение модели
trainer.fit(model, data_module)

# Тестирование модели
trainer.test(model, data_module)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [03:31<00:00, 801kB/s]  


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified



   | Name  | Type        | Params | Mode 
-----------------------------------------------
0  | conv1 | Conv2d      | 2.7 K  | train
1  | bn1   | BatchNorm2d | 192    | train
2  | conv2 | Conv2d      | 221 K  | train
3  | bn2   | BatchNorm2d | 512    | train
4  | conv3 | Conv2d      | 885 K  | train
5  | bn3   | BatchNorm2d | 768    | train
6  | conv4 | Conv2d      | 1.3 M  | train
7  | bn4   | BatchNorm2d | 768    | train
8  | conv5 | Conv2d      | 884 K  | train
9  | bn5   | BatchNorm2d | 512    | train
10 | pool  | MaxPool2d   | 0      | train
11 | fc1   | Linear      | 1.1 M  | train
12 | fc2   | Linear      | 16.8 M | train
13 | fc3   | Linear      | 409 K  | train
-----------------------------------------------
21.6 M    Trainable params
0         Non-trainable params
21.6 M    Total params
86.273    Total estimated model params size (MB)
14        Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/Users/emildenikaev/projects/itmo_dl/itmo_dl/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


                                                                           

/Users/emildenikaev/projects/itmo_dl/itmo_dl/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 2500/2500 [02:30<00:00, 16.64it/s, v_num=0]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2500/2500 [02:30<00:00, 16.59it/s, v_num=0]
Files already downloaded and verified
Files already downloaded and verified


/Users/emildenikaev/projects/itmo_dl/itmo_dl/.venv/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 625/625 [00:05<00:00, 120.76it/s]


[{'test_loss': 2.62971830368042, 'test_accuracy': 0.31540000438690186}]