**Задание: улучши нейросеть**

Сборная Москвы по ИИ | Введение в нейронные сети

---

В основном ноутбуке мы обучили простую полносвязную сеть на FashionMNIST и получили ~87% accuracy.

Твоя задача — улучшить модель. Вот несколько идей:

1. **Добавь больше слоёв** — сделай сеть глубже (например, 4-5 линейных слоёв с ReLU)
2. **Поменяй размеры слоёв** — попробуй больше/меньше нейронов (128, 512, ...)
3. **Добавь Dropout** — `nn.Dropout(p=0.2)` между слоями для регуляризации
4. **Поменяй оптимизатор / lr** — попробуй SGD с momentum, или другой learning rate
5. **Обучи дольше** — увеличь число эпох

Цель: попробуй достичь **> 90%** accuracy на тесте.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
%matplotlib inline

import torch
from torch import nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

Device: cuda


**Загрузка данных** (такая же, как в основном ноутбуке)

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.FashionMNIST(
    root='./data', train=True, download=True, transform=transform
)
test_dataset = torchvision.datasets.FashionMNIST(
    root='./data', train=False, download=True, transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True, num_workers=16)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False, num_workers=16)

class_names = [
    'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
    'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'
]

print(f'Train: {len(train_dataset)}, Test: {len(test_dataset)}')

Train: 60000, Test: 10000


**Функции для обучения и оценки** (такие же, как в основном ноутбуке)

In [3]:
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        logits = model(images)
        loss = criterion(logits, labels)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item() * images.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += images.size(0)

    return total_loss / total, correct / total


@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        logits = model(images)
        loss = criterion(logits, labels)

        total_loss += loss.item() * images.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += images.size(0)

    return total_loss / total, correct / total

---

**Базовая модель (для сравнения)**

Это та же модель из основного ноутбука. Запусти её, чтобы получить baseline.

In [4]:
class SimpleNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


# baseline = SimpleNet().to(device)
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(baseline.parameters(), lr=1e-3)

# print('Обучаем baseline...')
# for epoch in range(10):
#     train_loss, train_acc = train_epoch(baseline, train_loader, criterion, optimizer, device)
#     test_loss, test_acc = evaluate(baseline, test_loader, criterion, device)
#     print(f'Epoch {epoch+1:2d}/10 | Train acc: {train_acc:.4f} | Test acc: {test_acc:.4f}')

# print(f'\nBaseline Test accuracy: {test_acc:.4f}')

---

**Твоя модель**

Напиши свою улучшенную модель ниже. Не забудь:
- Наследоваться от `nn.Module`
- Определить `__init__` и `forward`
- Входной размер: `28 * 28 = 784` (после Flatten)
- Выходной размер: `10` классов (логиты, без softmax)

In [None]:
class ImprovedNet(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, dropout=0.3):
        super(ImprovedNet, self).__init__()

        layers = [
            nn.Flatten()
        ]
        prev_dim = input_dim

        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.BatchNorm1d(hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            prev_dim = hidden_dim

        layers.append(nn.Linear(prev_dim, output_dim))

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [None]:
my_model = ImprovedNet(input_dim=28*28, hidden_dims=[256, 128, 64, 32], output_dim=10).to(device)
my_criterion = nn.CrossEntropyLoss()
my_optimizer = torch.optim.Adam(my_model.parameters(), lr=0.005)
my_scheduler = StepLR(my_optimizer, step_size=10, gamma=0.8)

print(f'Параметров: {sum(p.numel() for p in my_model.parameters()):,}')

num_epochs = 100
train_losses_w, val_losses_w = [], []
train_accs_w, val_accs_w = [], []
best_val_loss = float('inf')
patience_w, counter_w = 30, 0

for epoch in tqdm(range(num_epochs)):
    my_model.train()
    train_loss, train_correct, train_total = 0, 0, 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        logits = my_model(X_batch)
        loss = my_criterion(logits, y_batch)
        my_optimizer.zero_grad()
        loss.backward()
        my_optimizer.step()

        train_loss += loss.item()
        _, preds = torch.max(logits, 1)
        train_correct += (preds == y_batch).sum().item()
        train_total += y_batch.size(0)

    train_loss /= len(train_loader)
    train_acc = train_correct / train_total
    train_losses_w.append(train_loss)
    train_accs_w.append(train_acc)

    my_model.eval()
    val_loss, val_correct, val_total = 0, 0, 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            logits = my_model(X_batch)
            
            loss = my_criterion(logits, y_batch)

            val_loss += loss.item()
            _, preds = torch.max(logits, 1)
            val_correct += (preds == y_batch).sum().item()
            val_total += y_batch.size(0)

    val_loss /= len(test_loader)
    val_acc = val_correct / val_total
    val_losses_w.append(val_loss)
    val_accs_w.append(val_acc)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter_w = 0
        torch.save(my_model.state_dict(), './models/best_model.pth')
    else:
        counter_w += 1
        if counter_w >= patience_w:
            print(f"Early stopping на эпохе {epoch+1} (val_loss не уменьшался)")
            break

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1:3d} | Train Loss: {train_loss:.4f} LR: {my_scheduler.get_last_lr()[0]:.4f} | "
              f"Val Loss (LogLoss): {val_loss:.4f} Acc: {val_acc:.4f}")
    
    my_scheduler.step()

print(f"\nЛучший результат (min val_loss): {best_val_loss:.4f}")


Параметров: 245,482


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch  10 | Train Loss: 0.3428 LR: 0.0050 | Val Loss (LogLoss): 0.3472 Acc: 0.8811
Epoch  20 | Train Loss: 0.2692 LR: 0.0040 | Val Loss (LogLoss): 0.3197 Acc: 0.8887
Epoch  30 | Train Loss: 0.2237 LR: 0.0032 | Val Loss (LogLoss): 0.3150 Acc: 0.8980
Epoch  40 | Train Loss: 0.1874 LR: 0.0026 | Val Loss (LogLoss): 0.3221 Acc: 0.9012
Epoch  50 | Train Loss: 0.1652 LR: 0.0020 | Val Loss (LogLoss): 0.3413 Acc: 0.9006
Early stopping на эпохе 55 (val_loss не уменьшался)

Лучший результат (min val_loss): 0.3110


In [7]:
_, test_acc = evaluate(my_model, test_loader, my_criterion, device)
print(f'Точность на тесте: {test_acc:.4f}')

Точность на тесте: 0.9042


**Сравнение результатов**

После обучения сравни accuracy своей модели с baseline (~87%). Удалось ли побить 89%?