In [9]:
!pip install lightning torchmetrics -q

In [10]:
import numpy as np
import pandas as pd

import glob
import time
import os
import sys
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader


import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

from lightning import Trainer, LightningModule, LightningDataModule
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

import torchmetrics as tm

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [4]:
%%capture
# Для запуска в колаб
!mkdir ../data/
!wget -P ../data https://github.com/a-milenkin/ml_instruments/raw/refs/heads/main/data/sign_mnist_train.csv.zip
!wget -P ../data https://github.com/a-milenkin/ml_instruments/raw/refs/heads/main/data/sign_mnist_test.csv.zip
!wget -P ../data https://github.com/a-milenkin/ml_instruments/raw/refs/heads/main/data/amer_sign2.png
!wget -P ../data https://github.com/a-milenkin/ml_instruments/raw/refs/heads/main/data/amer_sign3.png
!wget -P ../data https://github.com/a-milenkin/ml_instruments/raw/refs/heads/main/data/american_sign_language.PNG

!unzip ../data/sign_mnist_train.csv.zip -d ../data/
!unzip ../data/sign_mnist_test.csv.zip -d ../data/

In [5]:
train = pd.read_csv('../data/sign_mnist_train.csv')
test = pd.read_csv('../data/sign_mnist_test.csv')

In [6]:
train.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,3,107,118,127,134,139,143,146,150,153,...,207,207,207,207,206,206,206,204,203,202
1,6,155,157,156,156,156,157,156,158,158,...,69,149,128,87,94,163,175,103,135,149
2,2,187,188,188,187,187,186,187,188,187,...,202,201,200,199,198,199,198,195,194,195
3,2,211,211,212,212,211,210,211,210,210,...,235,234,233,231,230,226,225,222,229,163
4,13,164,167,170,172,176,179,180,184,185,...,92,105,105,108,133,163,157,163,164,179


In [7]:
test.head(3)

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,6,149,149,150,150,150,151,151,150,151,...,138,148,127,89,82,96,106,112,120,107
1,5,126,128,131,132,133,134,135,135,136,...,47,104,194,183,186,184,184,184,182,180
2,10,85,88,92,96,105,123,135,143,147,...,68,166,242,227,230,227,226,225,224,222


In [11]:
class SignLanguageDataset(Dataset):

    def __init__(self, df, transform=None):

        self.df = df
        self.transform = transform

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):

        label = self.df.iloc[index, 0]

        img = self.df.iloc[index, 1:].values.reshape(28, 28)
        img = torch.Tensor(img).unsqueeze(0)
        if self.transform is not None:
            img =self.transform(img)

        return img, label


In [12]:
transforms4train = transforms.Compose([
        #transforms.Normalize(159, 40),
        transforms.RandomHorizontalFlip(p=0.1),
        transforms.RandomApply([transforms.RandomRotation(degrees=(-180, 180))], p=0.2),
])

In [13]:
train_dataset = SignLanguageDataset(train, transform=transforms4train)
test_dataset = SignLanguageDataset(test)

In [23]:
class LightningSignLanguageDataset(LightningDataModule):
    def __init__(self, train_dataset, test_dataset, batch_size=200):
        super().__init__()
        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
        self.batch_size = batch_size

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)

In [24]:
class LightningConvNet(LightningModule):
    def __init__(self, stride=1, dilation=1, n_classes=25, lr=1e-3):
        super().__init__()
        self.save_hyperparameters()

        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3,
                      padding=1, stride=stride, dilation=dilation),
            nn.BatchNorm2d(8),
            nn.AvgPool2d(2),
            nn.ReLU()
        )

        self.block2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3,
                      padding=1, stride=stride, dilation=dilation),
            nn.BatchNorm2d(16),
            nn.AvgPool2d(2),
            nn.ReLU()
        )

        self.lin1 = nn.Linear(in_features=16 * 7 * 7, out_features=100)
        self.act1 = nn.LeakyReLU()
        self.drop1 = nn.Dropout(p=0.3)
        self.lin2 = nn.Linear(100, n_classes)

        self.train_roc = tm.AUROC(task="multiclass", num_classes=n_classes)
        self.train_fbeta = tm.FBetaScore(task="multiclass", num_classes=n_classes, beta=1.0, average='macro')

        self.valid_roc = tm.AUROC(task="multiclass", num_classes=n_classes)
        self.valid_fbeta = tm.FBetaScore(task="multiclass", num_classes=n_classes, beta=1.0, average='macro')

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = x.view(x.size(0), -1)
        x = self.lin1(x)
        x = self.act1(x)
        x = self.drop1(x)
        x = self.lin2(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)

        preds = F.softmax(logits, dim=1)

        self.train_roc(preds, y)
        self.train_fbeta(preds, y)

        self.log("train_loss", loss)
        self.log("train/roc", self.train_roc, on_step=True, on_epoch=True, prog_bar=True)
        self.log("train/fbeta", self.train_fbeta, on_step=True, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = F.softmax(logits, dim=1)

        self.valid_roc(preds, y)
        self.valid_fbeta(preds, y)

        self.log("val_loss", loss, on_epoch=True, prog_bar=True)
        self.log("val/roc", self.valid_roc, on_step=False, on_epoch=True, prog_bar=True)
        self.log("val/fbeta", self.valid_fbeta, on_step=False, on_epoch=True, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.hparams.lr)
        return optimizer

In [25]:
model = LightningConvNet()
data_module = LightningSignLanguageDataset(train_dataset, test_dataset, batch_size=200)

In [26]:
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    patience=3,
    verbose=True,
    mode="min"
)

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath="checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    mode="min",
    verbose=True
)

trainer = Trainer(
    max_epochs=20,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    callbacks=[early_stop_callback, checkpoint_callback]
)

trainer.fit(model, datamodule=data_module)

final_checkpoint = "final_model.ckpt"
trainer.save_checkpoint(final_checkpoint)
print("Последняя модель сохранена в:", final_checkpoint)
print("Лучшая модель сохранена в:", checkpoint_callback.best_model_path)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name        | Type                 | Params | Mode 
-------------------------------------------------------------
0 | block1      | Sequential           | 96     | train
1 | block2      | Sequential           | 1.2 K  | train
2 | lin1        | Linear               | 78.5 K | train
3 | act1        | LeakyReLU            | 0      | train
4 | drop1       | Dropout              | 0      | train
5 | lin2        | Linear               | 2.5 K  | train
6 | train_roc   | MulticlassA

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric val_loss improved. New best score: 1.181
INFO:lightning.pytorch.callbacks.early_stopping:Metric val_loss improved. New best score: 1.181
INFO: Epoch 0, global step 138: 'val_loss' reached 1.18100 (best 1.18100), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 0, global step 138: 'val_loss' reached 1.18100 (best 1.18100), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric val_loss improved by 0.520 >= min_delta = 0.0. New best score: 0.661
INFO:lightning.pytorch.callbacks.early_stopping:Metric val_loss improved by 0.520 >= min_delta = 0.0. New best score: 0.661
INFO: Epoch 1, global step 276: 'val_loss' reached 0.66061 (best 0.66061), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 1, global step 276: 'val_loss' reached 0.66061 (best 0.66061), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric val_loss improved by 0.141 >= min_delta = 0.0. New best score: 0.519
INFO:lightning.pytorch.callbacks.early_stopping:Metric val_loss improved by 0.141 >= min_delta = 0.0. New best score: 0.519
INFO: Epoch 2, global step 414: 'val_loss' reached 0.51922 (best 0.51922), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 2, global step 414: 'val_loss' reached 0.51922 (best 0.51922), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric val_loss improved by 0.078 >= min_delta = 0.0. New best score: 0.441
INFO:lightning.pytorch.callbacks.early_stopping:Metric val_loss improved by 0.078 >= min_delta = 0.0. New best score: 0.441
INFO: Epoch 3, global step 552: 'val_loss' reached 0.44074 (best 0.44074), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 3, global step 552: 'val_loss' reached 0.44074 (best 0.44074), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric val_loss improved by 0.062 >= min_delta = 0.0. New best score: 0.379
INFO:lightning.pytorch.callbacks.early_stopping:Metric val_loss improved by 0.062 >= min_delta = 0.0. New best score: 0.379
INFO: Epoch 4, global step 690: 'val_loss' reached 0.37850 (best 0.37850), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 4, global step 690: 'val_loss' reached 0.37850 (best 0.37850), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 5, global step 828: 'val_loss' was not in top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 5, global step 828: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 6, global step 966: 'val_loss' was not in top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 6, global step 966: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Metric val_loss improved by 0.087 >= min_delta = 0.0. New best score: 0.292
INFO:lightning.pytorch.callbacks.early_stopping:Metric val_loss improved by 0.087 >= min_delta = 0.0. New best score: 0.292
INFO: Epoch 7, global step 1104: 'val_loss' reached 0.29187 (best 0.29187), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 7, global step 1104: 'val_loss' reached 0.29187 (best 0.29187), saving model to '/content/checkpoints/best-checkpoint.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 8, global step 1242: 'val_loss' was not in top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 8, global step 1242: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 9, global step 1380: 'val_loss' was not in top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 9, global step 1380: 'val_loss' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

INFO: Monitored metric val_loss did not improve in the last 3 records. Best score: 0.292. Signaling Trainer to stop.
INFO:lightning.pytorch.callbacks.early_stopping:Monitored metric val_loss did not improve in the last 3 records. Best score: 0.292. Signaling Trainer to stop.
INFO: Epoch 10, global step 1518: 'val_loss' was not in top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 10, global step 1518: 'val_loss' was not in top 1


Последняя модель сохранена в: final_model.ckpt
Лучшая модель сохранена в: /content/checkpoints/best-checkpoint.ckpt


In [33]:
best_model = LightningConvNet.load_from_checkpoint("final_model.ckpt")
best_model.eval()

sample, actual_label = test_dataset[1]
print("Actual label:", actual_label)

sample = sample.unsqueeze(0)

with torch.no_grad():
    logits = best_model(sample)
    predicted_label = torch.argmax(logits, dim=1).item()

print("Predicted label:", predicted_label)

Actual label: 5
Predicted label: 5
