<a href="https://colab.research.google.com/github/artyommatveev/Machine_Learning_MIPT/blob/main/Second_semester/Homework_1/task_1_Matveev.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task 1. Analysis of Convolutional Neural Network

Analyze the quality of [EMNIST-letters](https://pytorch.org/vision/0.8/datasets.html#emnist) dataset approximation with a Convolutional Neural Network (CNN) model adjusting the following parameters:

* Kernel (filter) size.
* The number of layers.
* Type of pooling.
* Batch Normalization.
* Dropout.



## Import libraries

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from google.colab import drive
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms

drive.mount("/content/drive")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive


## Set Up MLOps environment

Update the webpage 1-2 times after you run the corresponding command (either `!nbqa isort PATH_TO_NOTEBOOK --float-to-top` or `!black PATH_TO_NOTEBOOK`).

### Install and run `isort`

In [None]:
!python -m pip install -U "nbqa[toolchain]"

In [None]:
!nbqa isort "/content/drive/MyDrive/Colab Notebooks/task_1_Matveev.ipynb" --float-to-top

[1mNo such file or directory: path_to_notebook[0m


### Install and run `black`

In [None]:
!pip install black[jupyter] --quiet

In [None]:
!black "/content/drive/MyDrive/Colab Notebooks/task_1_Matveev.ipynb"

[1mAll done! ✨ 🍰 ✨[0m
[34m1 file [0mleft unchanged.


## Define parameters

In [None]:
batch_size = 64
learning_rate = 0.001
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Загрузка данных
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)

train_dataset = datasets.EMNIST(
    root="./data", split="letters", train=True, download=True, transform=transform
)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.EMNIST(
    root="./data", split="letters", train=False, download=True, transform=transform
)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# Определение модели CNN
class CNN(nn.Module):
    def __init__(
        self, kernel_size, num_layers, use_pooling, use_batchnorm, dropout_rate
    ):
        super(CNN, self).__init__()
        layers = []
        in_channels = 1
        for _ in range(num_layers):
            layers.append(nn.Conv2d(in_channels, 32, kernel_size, padding=1))
            if use_batchnorm:
                layers.append(nn.BatchNorm2d(32))
            layers.append(nn.ReLU())
            if use_pooling:
                layers.append(nn.MaxPool2d(2, 2))
            if dropout_rate > 0:
                layers.append(nn.Dropout(dropout_rate))
            in_channels = 32
        self.features = nn.Sequential(*layers)
        self.fc = nn.Linear(32 * 6 * 6, 26)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


# Функция обучения модели
def train_model(model, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
        writer.add_scalar("Loss/train", epoch_loss, epoch)


# Функция тестирования модели
def test_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    print(f"Accuracy on test set: {accuracy:.4f}")
    writer.add_scalar("Accuracy/test", accuracy)


# Определение различных комбинаций гиперпараметров для анализа
kernel_sizes = [3, 5]
num_layers_list = [2, 3]
use_pooling_list = [True, False]
use_batchnorm_list = [True, False]
dropout_rates = [0.2, 0.5]

# Запуск экспериментов
for kernel_size in kernel_sizes:
    for num_layers in num_layers_list:
        for use_pooling in use_pooling_list:
            for use_batchnorm in use_batchnorm_list:
                for dropout_rate in dropout_rates:
                    model = CNN(
                        kernel_size,
                        num_layers,
                        use_pooling,
                        use_batchnorm,
                        dropout_rate,
                    ).to(device)
                    criterion = nn.CrossEntropyLoss()
                    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
                    writer = SummaryWriter(
                        log_dir=f"logs/kernel_{kernel_size}_layers_{num_layers}_pooling_{use_pooling}_batchnorm_{use_batchnorm}_dropout_{dropout_rate}"
                    )
                    print(
                        f"Experiment with kernel size={kernel_size}, num_layers={num_layers}, pooling={use_pooling}, batchnorm={use_batchnorm}, dropout={dropout_rate}"
                    )
                    train_model(model, criterion, optimizer, num_epochs)
                    test_model(model)
                    writer.close()