# Домашняя работа по регуляризации и оптимизации

Ниже приводится код модели и функции обучения. Далее предлагается улучшить качество модели путем добавления регуляризаций и настройки оптимизатора.

Оценка будет выставляться по итоговому качеству на тестовом корпусе:

- 2: < 50%
- 3: 50-60%
- 4: 60-75%
- 5: > 75%

In [None]:
### Uncomment to install libraries.
!pip3 install pytorch_lightning torchmetrics PyYAML

In [3]:
import torch
import pytorch_lightning as pl
import yaml
from pytorch_lightning import seed_everything
from torchmetrics import Accuracy
from torchvision.datasets import CIFAR10
from torchvision.transforms import Compose, ToTensor, Normalize

ROOT = "hwroot"
ANSWER_FILE = "submission.yaml"
USE_CUDA = torch.cuda.is_available()
NUM_WORKERS = 4
print("USE CUDA:", USE_CUDA)

USE CUDA: True


In [4]:
transform = Compose([ToTensor(), Normalize(0.5, 0.5)])
trainset = CIFAR10(ROOT, train=True, transform=transform, download=True)
testset = CIFAR10(ROOT, train=False, transform=transform, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to hwroot/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 28647691.18it/s]


Extracting hwroot/cifar-10-python.tar.gz to hwroot
Files already downloaded and verified


In [None]:
class CNN(torch.nn.Sequential):
    def __init__(self, num_classes=10, num_layers=10):
        in_channels = 3
        layers = []
        for i in range(num_layers):
            stride = 2 if (i + 1) % 3 == 0 else 1
            out_channels = 8 if i == 0 else in_channels * stride
            layers.append(self.make_layer(in_channels, out_channels, stride))
            in_channels = out_channels
        layers.append(self.make_head(in_channels, num_classes))
        super().__init__(*layers)
        self.num_classes = num_classes
            
    def make_layer(self, in_channels, out_channels, stride):
        return torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, out_channels, 3, stride, padding=1),
            torch.nn.ReLU()
        )

    def make_head(self, in_channels, out_channels):
        return torch.nn.Sequential(
            torch.nn.AdaptiveMaxPool2d((1, 1)),
            torch.nn.Flatten(),
            torch.nn.Linear(in_channels, out_channels)
        )
    
print(CNN(10))

In [6]:
class Module(pl.LightningModule):
    def __init__(self, model, batch_size=64):
        super().__init__()
        self.batch_size = batch_size
        self.model = model
        self.criterion = torch.nn.CrossEntropyLoss()
        self.metric = Accuracy(task="multiclass", num_classes=model.num_classes)
        
    def forward(self, images):
        return self.model(images)
    
    def step(self, stage, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = self.criterion(logits, labels)
        return {
            "loss": loss,
            "logits": logits
        }
    
    def training_step(self, batch, batch_idx):
        results = self.step("train", batch, batch_idx)
        self.log(
            "lr", self.trainer.optimizers[0].param_groups[0]["lr"], 
            prog_bar=True)
        with torch.no_grad():
            accuracy = (
                results["logits"].argmax(-1) == batch[1]).float().mean().item()
            self.log("accuracy", accuracy, prog_bar=True)
        return results["loss"]
    
    def validation_step(self, batch, batch_idx):
        results = self.step("val", batch, batch_idx)
        self.metric.update(results["logits"].argmax(dim=-1), batch[1])
        self.log(f"val_loss", results["loss"], prog_bar=True)
        self.log(f"val_acc", self.metric, prog_bar=True)
        
    def on_validation_epoch_end(self, *arg, **kwargs):
        self.final_metric = self.metric.compute()
        super().on_validation_epoch_end(*arg, **kwargs)
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=0.1)
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)
        return [optimizer], [scheduler]
    
    def train_dataloader(self):
        return torch.utils.data.DataLoader(trainset, batch_size=self.batch_size,
                                           shuffle=True, drop_last=True,
                                           num_workers=NUM_WORKERS)
    
    def val_dataloader(self):
        return torch.utils.data.DataLoader(testset, batch_size=self.batch_size,
                                           num_workers=NUM_WORKERS)

In [7]:
def train(module, dump=None):
    seed_everything(0)
    trainer = pl.Trainer(
        default_root_dir=ROOT, accelerator="auto", max_epochs=20)
    trainer.fit(module)
    if dump is not None:
        with open(dump, "w") as fp:
            yaml.safe_dump(
                {"tasks": [{"task1": {"answer": module.final_metric.item()}}]}, 
                fp
                )

In [8]:
train(Module(CNN()))

INFO:lightning_fabric.utilities.seed:Global seed set to 0
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type               | Params
-------------------------------------------------
0 | model     | CNN                | 85.8 K
1 | criterion | CrossEntropyLoss   | 0     
2 | metric    | MulticlassAccuracy | 0     
-------------------------------------------------
85.8 K    Trainable params
0         Non-trainable params
85.8 K    Total params
0.343     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


# Ваше решение

In [9]:
from torch.nn.quantized.modules import Dropout

In [10]:
class BetterCNN(CNN):
    # Предлагается добавить регуляризации BatchNorm2d и Dropout.
    def make_layer(self, in_channels, out_channels, stride): 
        return torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, out_channels, 3, stride, padding=1, bias=False),
            torch.nn.BatchNorm2d(out_channels),
            torch.nn.ReLU(),
            torch.nn.Dropout(p=0.2)
        )

    def make_head(self, in_channels, out_channels):
        return torch.nn.Sequential(
            torch.nn.AdaptiveMaxPool2d((1, 1)),
            torch.nn.Flatten(),
            torch.nn.Linear(in_channels, out_channels)
        )

In [11]:
class BetterModule(Module):
    # Предлагается выбрать оптимизатор лучше, чем SGD.
    # Также предлагается подобрать параметры обучения.
    def configure_optimizers(self):
        # optimizer = torch.optim.SGD(self.parameters(), lr=.09, momentum=.7, nesterov=True)
        optimizer = torch.optim.Adam(self.parameters(), lr=.01)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2, .9)
        return [optimizer], [scheduler]

In [12]:
model = BetterModule(BetterCNN())
train(model, dump=ANSWER_FILE)

INFO:lightning_fabric.utilities.seed:Global seed set to 0
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type               | Params
-------------------------------------------------
0 | model     | BetterCNN          | 86.1 K
1 | criterion | CrossEntropyLoss   | 0     
2 | metric    | MulticlassAccuracy | 0     
-------------------------------------------------
86.1 K    Trainable params
0         Non-trainable params
86.1 K    Total params
0.344     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


In [42]:
### Use from Google Colab to download result.
from google.colab import files
files.download(ANSWER_FILE) 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>