**На каггле TEAM NAME: Dmitry_Babkin_804338584**

### Импорт библиотек

In [1]:
import torch
import pandas as pd
import numpy as np
import pickle
import torch.nn as nn
import torchvision.models as models

from skimage import io
from tqdm import tqdm, tqdm_notebook
from PIL import Image
from pathlib import Path
from sklearn.metrics import f1_score

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import WeightedRandomSampler
from multiprocessing.pool import ThreadPool
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
from collections import Counter
from albumentations.pytorch.transforms import ToTensorV2
from albumentations import (
    Compose, Resize, HorizontalFlip, ShiftScaleRotate, RandomResizedCrop,
    RandomBrightnessContrast, GaussianBlur, ColorJitter, Normalize
)
from multiprocessing import cpu_count
from matplotlib import colors, pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

  check_for_updates()


In [2]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


###  Преобразование входных данных и настройка аугментации

In [3]:
RESCALE_SIZE = 224

# Используем аугментацию для обучения
train_transform = Compose([
    Resize(RESCALE_SIZE, RESCALE_SIZE),
    HorizontalFlip(p=0.5),
    ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=15, p=0.5),
    ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    GaussianBlur(blur_limit=(3, 5), p=0.3),
    RandomResizedCrop(height=RESCALE_SIZE, width=RESCALE_SIZE, scale=(0.8, 1.0), ratio=(0.75, 1.33)),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

val_transform = Compose([
    Resize(RESCALE_SIZE, RESCALE_SIZE),
    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

### Загрузка данных

In [4]:
# Датасеты
train_dataset = ImageFolder('/kaggle/input/journey-springfield/train/simpsons_dataset', 
                            transform=lambda img: train_transform(image=np.array(img))["image"])
test_dataset = ImageFolder('/kaggle/input/journey-springfield/testset/', 
                           transform=lambda img: val_transform(image=np.array(img))["image"])

In [5]:
# Статистика по классам
class_counts = np.bincount([sample[1] for sample in train_dataset.samples])
print(f"Class distribution: {[round(c / sum(class_counts) * 100, 2) for c in class_counts]}%")

Class distribution: [4.36, 0.2, 2.98, 0.51, 6.41, 0.47, 5.7, 4.71, 0.22, 2.24, 0.04, 2.18, 0.13, 0.13, 0.58, 10.73, 2.38, 5.76, 1.48, 0.01, 6.47, 0.61, 6.17, 0.34, 1.18, 5.15, 0.08, 6.94, 6.95, 1.71, 0.15, 0.34, 5.7, 0.31, 0.21, 0.43, 0.49, 4.19, 0.19, 0.26, 0.04, 0.86]%


### Обработка дисбаланса классов в данных

In [6]:
# взвешивание классов и нормализация
class_counts = np.bincount([sample[1] for sample in train_dataset.samples])
class_weights = 1.0 / class_counts
class_weights /= class_weights.sum()  # Нормализация

# присвоение изображению веса на основе его класса
sample_weights = np.array([class_weights[label] for _, label in train_dataset.samples], dtype=np.float32)

# статистика о семплировании
print(f"Sample weights (min, max): {sample_weights.min(), sample_weights.max()}")
print(f"Class weights (min, max): {class_weights.min(), class_weights.max()}")

# выборка с учётом весов
train_sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

train_dataloader = DataLoader(train_dataset, batch_size=128, sampler=train_sampler, shuffle=False, pin_memory=True, num_workers = min(4, cpu_count()))
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True, num_workers = min(4, cpu_count()))

Sample weights (min, max): (0.00044869413, 0.33592236)
Class weights (min, max): (0.0004486941425066419, 0.33592234802330584)


### Реализация модели

In [7]:
class Resnet50FineTune(nn.Module):
    def __init__(self, num_classes=42):
        super(Resnet50FineTune, self).__init__()
        self.resnet50 = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)

        # Замораживание весов
        for param in self.resnet50.parameters():
            param.requires_grad = False

        # разморозим параметры последнего блока layer4 и полносвязного слоя
        for param in self.resnet50.layer4.parameters():
            param.requires_grad = True
        for param in self.resnet50.fc.parameters():
            param.requires_grad = True
        
        in_features = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.resnet50(x)

### Обучение модели и предсказание на тестовом наборе данных

In [8]:
def training_epoch(model, optimizer, criterion, train_loader, tqdm_desc):
    train_accuracy = 0.0
    model.train()
    for images, labels in tqdm(train_loader, desc=tqdm_desc):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        logits = model(images)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        train_accuracy += (logits.argmax(dim=1) == labels).sum().item()

    train_accuracy /= len(train_loader.dataset)
    return train_accuracy


def train(model, optimizer, criterion, train_loader, num_epochs, scheduler=None):
    for epoch in range(1, num_epochs + 1):
        train_accuracy = training_epoch(
            model, optimizer, criterion, train_loader,
            tqdm_desc=f'Training {epoch}/{num_epochs}'
        )
        if scheduler is not None:
            scheduler.step()
        print(f'Training batch accuracy {train_accuracy}')

        if scheduler is not None:
            scheduler.step()


@torch.no_grad()
def predict(model, test_loader):
    all_preds = []
    all_labels = []
    model.eval()
    for images, labels in tqdm(test_loader, desc="Predicting"):
        images = images.to(device)
        outputs = model(images)
        all_preds.append(outputs.argmax(dim=1).cpu().numpy())
        all_labels.append(labels.numpy())
    
    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)
    return all_preds, all_labels

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_epochs = 10
criterion = torch.nn.CrossEntropyLoss()

model = Resnet50FineTune(num_classes=len(train_dataset.classes)).to(device)
model = nn.DataParallel(model)
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
T_max = num_epochs // 2
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=T_max)

train(model, optimizer, criterion, train_dataloader, num_epochs, scheduler)

preds, actual_labels = predict(model, train_dataloader)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 207MB/s]
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Training 1/10: 100%|██████████| 164/164 [02:31<00:00,  1.08it/s]


Training batch accuracy 0.8542015000238857


Training 2/10: 100%|██████████| 164/164 [02:20<00:00,  1.17it/s]


Training batch accuracy 0.9706205512826638


Training 3/10: 100%|██████████| 164/164 [02:21<00:00,  1.16it/s]


Training batch accuracy 0.9856685616012993


Training 4/10: 100%|██████████| 164/164 [02:13<00:00,  1.23it/s]


Training batch accuracy 0.9882482205130655


Training 5/10: 100%|██████████| 164/164 [02:17<00:00,  1.20it/s]


Training batch accuracy 0.9804137008551091


Training 6/10: 100%|██████████| 164/164 [02:16<00:00,  1.20it/s]


Training batch accuracy 0.9708116371279798


Training 7/10: 100%|██████████| 164/164 [02:14<00:00,  1.22it/s]


Training batch accuracy 0.9834710743801653


Training 8/10: 100%|██████████| 164/164 [02:15<00:00,  1.21it/s]


Training batch accuracy 0.9917833086514116


Training 9/10: 100%|██████████| 164/164 [02:16<00:00,  1.20it/s]


Training batch accuracy 0.9940285673338748


Training 10/10: 100%|██████████| 164/164 [02:13<00:00,  1.23it/s]


Training batch accuracy 0.9903501648115416


Predicting: 100%|██████████| 164/164 [02:15<00:00,  1.21it/s]


 ### Подготовка предсказаний для тестовых данных

In [10]:
# Вычисление F1-метрики
f1 = f1_score(actual_labels, preds, average='micro')
print(f'F1 Score: {f1:.4f}')

F1 Score: 0.9919


In [11]:
preds, _ = predict(model, test_dataloader)

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
Predicting: 100%|██████████| 31/31 [00:02<00:00, 10.70it/s]


In [12]:
TEST_DIR = Path('/kaggle/input/journey-springfield/testset')
test_files = sorted(list(TEST_DIR.rglob('*.jpg')))
file_names = [file.name for file in test_files]

In [13]:
idx_to_class = {l: n for n, l in train_dataset.class_to_idx.items()}
preds_class_names = [idx_to_class[pred] for pred in preds]

In [14]:
my_submit = pd.DataFrame({'Id': file_names, 'Expected': preds_class_names})
my_submit.head()

Unnamed: 0,Id,Expected
0,img0.jpg,nelson_muntz
1,img1.jpg,bart_simpson
2,img10.jpg,ned_flanders
3,img100.jpg,chief_wiggum
4,img101.jpg,apu_nahasapeemapetilon


In [15]:
my_submit.to_csv('submission.csv', index=False)