# Imports

In [1]:
import random
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from PIL import Image
from tqdm.notebook import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from os import listdir
from os.path import join
from torch.optim import SGD
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from torchinfo import summary

# Seed

In [2]:
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    print(torch.cuda.get_device_name())

NVIDIA GeForce RTX 3060 Laptop GPU


# Constants

In [3]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATASET_PATH = 'data'

IMG_SIZE = (224, 224)
GLOBAL_MEAN, GLOBAL_STD = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
BATCH_SIZE = 128
VAL_SIZE = .1
EPOCHS = 16

# Dataset

In [4]:
class SportsDataset(Dataset):
    def __init__(self, data_path, label2id, is_train=True, transform=None):
        self.data_path = data_path
        self.is_train = is_train
        self.folder = join(data_path, 'train' if is_train else 'test')
        self.images = listdir(self.folder)
        self.dataframe = pd.read_csv(f'{self.folder}.csv')
        self.label_encoding = encoder
        self.transform = transform

    def __getitem__(self, idx):
        image_name = self.images[idx]
        with Image.open(join(self.folder, image_name)).convert('RGB') as image:
            image.load()

        if self.transform:
            image = self.transform(image)
        if self.is_train:
            y = self.dataframe[self.dataframe.image_id == image_name].label.item()
            return image, torch.from_numpy(self.label_encoding.transform([y])).type(torch.long)
        return image, image_name
        
    def __len__(self):
        return len(self.images)

In [5]:
unique_labels = sorted(set(pd.read_csv(join(DATASET_PATH, 'train.csv')).label))
NUM_CLASSES = len(unique_labels)

encoder = LabelEncoder()
encoder.fit(unique_labels)

In [6]:
transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.AutoAugment(),
    transforms.RandAugment(),
    transforms.ToTensor(),
    transforms.Normalize(GLOBAL_MEAN, GLOBAL_STD)
])

In [7]:
data = SportsDataset(DATASET_PATH, encoder, is_train=True, transform=transform)
train_data, val_data = random_split(data, [1 - VAL_SIZE, VAL_SIZE])

In [8]:
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)

# Training

In [9]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = models.vit_l_32(weights='DEFAULT')
        
        for param in self.model.parameters():
            param.requires_grad = False

        self.model.heads = nn.Sequential(nn.Linear(1024, NUM_CLASSES))

    def forward(self, x):
        return self.model(x)

In [10]:
model = Model()
model.to(DEVICE)
summary(model)

Layer (type:depth-idx)                                                 Param #
Model                                                                  --
├─VisionTransformer: 1-1                                               1,024
│    └─Conv2d: 2-1                                                     (787,456)
│    └─Encoder: 2-2                                                    201,728
│    │    └─Dropout: 3-1                                               --
│    │    └─Sequential: 3-2                                            (302,309,376)
│    │    └─LayerNorm: 3-3                                             (2,048)
│    └─Sequential: 2-3                                                 --
│    │    └─Linear: 3-4                                                30,750
Total params: 303,332,382
Trainable params: 30,750
Non-trainable params: 303,301,632

In [11]:
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=EPOCHS):
    train_losses, val_losses = [], []
    for epoch in tqdm(range(num_epochs)):
        model.train()
        train_loss = val_loss = .0
        true_labels, pred_labels = [], []
        for X, y in tqdm(train_loader, leave=False):
            X = X.to(DEVICE)
            y = y.to(DEVICE).squeeze()
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()

            optimizer.step()

            train_loss += loss.item()
            
            true_labels.extend(y.cpu())
            pred_labels.extend(outputs.cpu().argmax(1))
            # print(loss.item(), f1_score(true_labels, pred_labels, average='micro'))

        train_loss /= len(train_loader.dataset)
        train_losses.append(train_loss)
        f1_train = f1_score(true_labels, pred_labels, average='micro')
        true_labels, pred_labels = [], []
        
        model.eval()
        with torch.no_grad():
            for X, y in tqdm(val_loader, leave=False):
                X = X.to(DEVICE)
                y = y.to(DEVICE).squeeze()
                outputs = model(X)
                loss = criterion(outputs, y)
                val_loss += loss.item()
                true_labels.extend(y.cpu())
                pred_labels.extend(outputs.cpu().argmax(1))

        val_loss /= len(val_loader.dataset)
        val_losses.append(val_loss)
        f1_val = f1_score(true_labels, pred_labels, average='micro')

        print(f"Epoch: {epoch + 1}, train loss: {train_loss:.4f},  val. loss: {val_loss:.4f},  f1_train: {f1_train},  f1_val: {f1_val}")

    return train_losses, val_losses

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)

In [13]:
train_losses, val_losses = train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=EPOCHS)

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch: 1, train loss: 0.0067,  val. loss: 0.0041,  f1_train: 0.806763639925556,  f1_val: 0.877011240908089


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch: 2, train loss: 0.0038,  val. loss: 0.0038,  f1_train: 0.878685473601724,  f1_val: 0.879876570420983


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch: 3, train loss: 0.0034,  val. loss: 0.0035,  f1_train: 0.8881868939171319,  f1_val: 0.8922195283226801


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch: 4, train loss: 0.0034,  val. loss: 0.0035,  f1_train: 0.8902194142423353,  f1_val: 0.8900154286973772


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

Epoch: 5, train loss: 0.0033,  val. loss: 0.0034,  f1_train: 0.8917621706337545,  f1_val: 0.8904562486224378


  0%|          | 0/320 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(12, 9))
plt.plot(train_losses, label = 'train')
plt.plot(val_losses, label = 'val')
plt.xlabel('epoches')
plt.ylabel('loss')
plt.legend()
plt.show()

NameError: name 'train_losses' is not defined

<Figure size 864x648 with 0 Axes>

# Prediction

In [14]:
test_data = SportsDataset(DATASET_PATH, encoder, is_train=False, transform=transform)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, pin_memory=True)

In [15]:
def get_prediction(model, test_loader, encoder, filename):
    image_ids, labels = [], []
    for X, y in tqdm(test_loader):
        X = X.to(DEVICE)
        image_ids.extend(y)
        preds = model(X).cpu().argmax(1)
        labels.extend(encoder.inverse_transform(preds))

    predictions = pd.DataFrame({"image_id": image_ids, "label": labels})
    predictions.to_csv(filename, index=False)

    return predictions

In [16]:
data = get_prediction(model, test_loader, encoder, 'submission_.csv')

  0%|          | 0/152 [00:00<?, ?it/s]