## Здесь вам необходимо решить задачу классификации изображений

Решить задачу будет необходимо для 3 различных датасетов:

1) MNIST - датасет изображений цифр от 0 до 10 (уже есть пример решения, оценивать будет только CIFAR+ImageNet)

2) CIFAR - небольшой датасет с изображением различных объектов - максимум 5 баллов

3) ImageNet - эталонный датасет для оценки моделей классификации изображений - максимум 10 баллов

## Рекомендации

1) Посмотреть на данные, посмотреть сбалансированность классов

2) Написать класс датасета, чтобы данные можно было удобно использовать для обучения

3) Написать функции обучения и валидации

4) Написать/импортировать модель

5) Обучить и получить неплохую метрику)
(больше - лучше)


In [1]:
import torch
from torch import nn
from torchvision.datasets import MNIST, CIFAR10, ImageNet
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import torchvision
import os, shutil
from sklearn.metrics import *
from tqdm.notebook import tqdm

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = MNIST(root='./data', train=False, download=True, transform=transform)

## MNIST

In [3]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, 64, shuffle=True, drop_last=False)
test_loader = DataLoader(test_dataset, 128, shuffle=False, drop_last=False)

In [4]:
def train_epoch(model, dataloader, loss, optimizer, scheduler, device, n_epoch):
    pbar = tqdm(dataloader, total = len(dataloader))
    loss_p = [0, 0]
    preds, targets = [], []
    for (data, labels) in pbar:
        data = data.to(device)
        labels = labels.to(device)
        outputs = model(data)
        cur_loss = loss(outputs, labels)
        loss_p[0] += cur_loss.item() * labels.shape[0]
        loss_p[1] += labels.shape[0]
        targets += labels.detach().cpu().numpy().tolist()
        preds += outputs.detach().argmax(dim=1).cpu().numpy().tolist()
        cur_loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        pbar.set_description(f"Train_loss: {loss_p[0] / loss_p[1]}")
    if scheduler is not None:
        scheduler.step()
    return (loss_p[0] / loss_p[1], accuracy_score(targets, preds))

@torch.no_grad
def eval_epoch(model, dataloader, loss, optimizer, scheduler, device, n_epoch):
    pbar = tqdm(dataloader, total = len(dataloader))
    loss_p = [0, 0]
    model.eval()
    preds, targets = [], []
    for (data, labels) in pbar:
        data = data.to(device)
        labels = labels.to(device)
        outputs = model(data)
        cur_loss = loss(outputs, labels)
        loss_p[0] += cur_loss.item() * labels.shape[0]
        loss_p[1] += labels.shape[0]
        targets += labels.detach().cpu().numpy().tolist()
        preds += outputs.detach().argmax(dim=1).cpu().numpy().tolist()
        pbar.set_description(f"Test_loss: {loss_p[0] / loss_p[1]}")
    return (loss_p[0] / loss_p[1], accuracy_score(targets, preds))

def train_model(model, train_loader, test_loader, loss, optimizer, scheduler=None, device='cpu', n_epochs=10):
    pbar = tqdm(range(n_epochs), total = n_epochs)
    model = model.to(device)
    for epoch in pbar:
        train_loss, train_acc = train_epoch(model, train_loader, loss, optimizer, scheduler, device, epoch)
        test_loss, test_acc = eval_epoch(model, test_loader, loss, optimizer, scheduler, device, epoch)
        pbar.set_description(f"Epoch: {epoch}, Train Loss: {train_loss}, Test_loss: {test_loss}, Test Accuracy: {test_acc}")
    return model

In [5]:
class LinearNet(nn.Module):
    def __init__(self, hidden_size=[784, 1024, 2048, 2048, 1024, 512, 256]):
        super().__init__()
        self.blocks = [nn.Sequential(
                                      nn.Linear(hidden_size[i], hidden_size[i+1]),
                                      nn.BatchNorm1d(hidden_size[i+1]),
                                      nn.ReLU(), #x = max(0, x)
                                    ) for i in range(len(hidden_size)-1)]
        self.cls = nn.Sequential(
            nn.Linear(hidden_size[-1], 10)
        )

    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        for block in self.blocks:
            x = block(x)
        x = self.cls(x)
        return x

model = LinearNet()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = None
device = 'cpu'
n_epochs=5

In [6]:
class ConvNet(nn.Module):
    def __init__(self, hidden_size=[1, 3, 8, 16, 16, 16, 8, 4]):
        super().__init__()
        self.blocks = [nn.Sequential(
                                      nn.Conv2d(hidden_size[i], hidden_size[i+1], kernel_size=5, padding=2),
                                      nn.BatchNorm2d(hidden_size[i+1]),
                                      nn.ReLU(), #x = max(0, x)
                                    ) for i in range(len(hidden_size)-1)]
        self.cls = nn.Sequential(
            nn.Linear(hidden_size[-1]*784, 10)
        )

    def forward(self, x):
        for block in self.blocks:
            y = block(x)
            if y.shape == x.shape:
                x = y + x
            else:
                x = y
        x = x.reshape(x.shape[0], -1)
        x = self.cls(x)
        return x

model = ConvNet()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = None
device = 'cpu'
n_epochs=5

In [None]:
train_model(model, train_loader, test_loader, loss, optimizer, scheduler, device, n_epochs)

In [8]:
model = torchvision.models.resnet18()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = None
device = 'cpu'
n_epochs=5
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
model.fc = nn.Linear(512, 10)

In [None]:
train_model(model, train_loader, test_loader, loss, optimizer, scheduler, device, n_epochs)

## CIFAR

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [14]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)

In [15]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, drop_last=False)

In [23]:
class CIFARModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [24]:
model = CIFARModel().to(device)
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
n_epochs = 5

In [None]:
model = train_model(model, train_loader, test_loader, loss, optimizer, device=device, n_epochs=n_epochs)

## ImageNet

**Примечание:** я решил использовать TinyImageNet, т.к. не хотел загружать 155ГБ архив с Kaggle. PyTotch в Google Colab не поддерживал напрямую TinyImageNet, так что я загрузил его отдельным архивом.

In [None]:
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip -P ./data
!unzip ./data/tiny-imagenet-200.zip -d ./data

In [31]:
from torch.utils.data import Dataset
from PIL import Image

In [41]:
class TinyImageNetDataset(Dataset):
    def __init__(self, root, split='train', transform=None):
        self.root = os.path.join(root, 'tiny-imagenet-200')
        self.transform = transform
        self.split = split
        self.class_to_idx = {}

        with open(os.path.join(self.root, 'wnids.txt'), 'r') as f:
            self.classes = [line.strip() for line in f]

        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}

        if split == 'train':
            self.images = []
            for class_name in self.classes:
                class_dir = os.path.join(self.root, 'train', class_name, 'images')
                for img_name in os.listdir(class_dir):
                    self.images.append((
                        os.path.join(class_dir, img_name),
                        class_name
                    ))
        else:
            self.images = []
            with open(os.path.join(self.root, 'val', 'val_annotations.txt'), 'r') as f:
                for line in f:
                    parts = line.strip().split('\t')
                    img_name = parts[0]
                    class_name = parts[1]
                    img_path = os.path.join(self.root, 'val', 'images', img_name)
                    self.images.append((img_path, class_name))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path, class_name = self.images[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        label = self.class_to_idx[class_name]

        return image, label

In [42]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [43]:
train_dataset = TinyImageNetDataset(root='./data', split='train', transform=transform)
val_dataset = TinyImageNetDataset(root='./data', split='val', transform=transform)

In [44]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [None]:
model = torchvision.models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 200)
model = model.to(device)

In [48]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
n_epochs = 5

In [None]:
model = train_model(model, train_loader, test_loader, loss, optimizer, device=device, n_epochs=n_epochs)