In [1]:
%load_ext autoreload
%autoreload 2

Imports

In [2]:
import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import random_split
from torch.utils.data import DataLoader, ConcatDataset
import torcheval
from torcheval.metrics import MulticlassF1Score, Mean
import mlflow as mlf
import optuna as opt
import torchvision as tn
import sklearn as sn
from sklearn.metrics import f1_score
import pandas as ps
import numpy as ny
import typing as t
import pathlib as pl
import matplotlib.pyplot as pt
import random as rng
from tqdm import tqdm
import tqdm as tm
from pprint import pprint
from git import Repo

from data import load_data, GenImageDataset
from model import ResCNN

Environment

In [3]:
# Paths
ROOT_PATH = pl.Path('..')
GIT_PATH = ROOT_PATH / '..' / '..'
DATA_PATH = ROOT_PATH / 'data'
CKPT_PATH = ROOT_PATH / 'ckpt'

# Tracking
remote_server = r"http://127.0.0.1:8080"
mlf.set_tracking_uri(remote_server)
mlf_api = mlf.MlflowClient(remote_server)
git_repo = Repo(GIT_PATH)
git_tag = sorted(git_repo.tags, key=lambda x: x.commit.committed_datetime, reverse=True)[-1]

# Hardware
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 32
num_workers = 8
prefetch_factor = 4

# Reproducibility
SEED = 7982
rng.seed(SEED)
ny.random.seed(SEED)
torch.manual_seed(SEED)
gen_numpy = ny.random.default_rng(SEED)
gen_torch = torch.Generator('cpu').manual_seed(SEED)

# Constants
CONST_NUM_CLASS = 100

Load dataset

In [4]:
# Load raw dataset to access internals
train_data_raw = GenImageDataset(DATA_PATH, 'train', False)
valid_data_raw = GenImageDataset(DATA_PATH, 'val', False)
test_data_raw = GenImageDataset(DATA_PATH, 'test', False)

# Load preprocessed dataset
train_data, test_data = load_data(DATA_PATH)
train_data, valid_data = random_split(train_data, [0.8, 0.2], gen_torch)

# Prepare on-memory loading
train_loader = DataLoader(train_data, batch_size, shuffle=True, prefetch_factor=prefetch_factor, num_workers=num_workers, generator=gen_torch)
valid_loader = DataLoader(valid_data, batch_size, shuffle=True, prefetch_factor=prefetch_factor, num_workers=num_workers, generator=gen_torch)
test_loader = DataLoader(test_data, batch_size, shuffle=True, prefetch_factor=prefetch_factor, num_workers=num_workers, generator=gen_torch)

Create Experiment

In [5]:
exp_name = 'Test Experiment'
exp_tags = {
    'submission': git_tag.name.split('_')[1],
    'debug': 'true'
}

if (exp := mlf.get_experiment_by_name(exp_name)) is not None:
    exp_id: str = exp.experiment_id
else:
    exp_id: str = mlf.create_experiment(exp_name, tags=exp_tags)

exp = mlf.set_experiment(experiment_id=exp_id)

Create Experiment Run

In [6]:
# Training Settings
epochs = 25
loss_fn = nn.CrossEntropyLoss()

# HyperParameters

# Model
model = ResCNN().to(DEVICE)
optim = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=3e-5)

# Metrics
metric_train_f1_score = MulticlassF1Score(num_classes=CONST_NUM_CLASS, average='macro', device=DEVICE)
metric_train_loss = Mean(device=DEVICE)
metric_valid_f1_score = MulticlassF1Score(num_classes=CONST_NUM_CLASS, average='macro', device=DEVICE)
metric_valid_loss = Mean(device=DEVICE)
metric_test_f1_score = MulticlassF1Score(num_classes=CONST_NUM_CLASS, average='macro', device=DEVICE)

In [15]:
for e in tm.trange(epochs, desc='epoch', position=0):
    model = model.train(True).requires_grad_(True)
    with mlf.start_run(run_name=f'Training-Epoch-{e}'):
        with tm.tqdm(desc='train_batch', total=len(train_loader), position=1) as batch:
            for X, y in train_loader:
                # Send data to GPU
                # TODO: KORNIA
                X: Tensor = X.to(DEVICE)
                y_true: Tensor = y.to(DEVICE)

                # Train
                optim.zero_grad()
                logits: Tensor = model(X)
                loss: Tensor = loss_fn(logits, y_true)
                loss.backward()
                optim.step()

                # Track metrics
                metric_train_f1_score.update(logits.detach(), y_true)
                metric_train_loss.update(loss.detach())
                batch.update(1)

        mlf.log_metric('train_loss', metric_train_loss.compute().item(), step=e)
        mlf.log_metric('train_f1_score', metric_train_f1_score.compute().item(), step=e)
        metric_train_f1_score.reset()
        metric_train_loss.reset()
        model = model.eval().requires_grad_(False)

        with tm.tqdm(desc='valid_batch', total=len(train_loader), position=2) as batch:
            for X, y in valid_loader:
                # Send data to GPU
                # TODO: KORNIA
                X: Tensor = X.to(DEVICE)
                y_true: Tensor = y.to(DEVICE)

                # Infer
                with torch.no_grad():
                    logits: Tensor = model(X)

                # Track metrics
                loss: Tensor = loss_fn(logits, y_true)
                metric_valid_f1_score.update(logits, y_true)
                metric_valid_loss.update(loss)
                batch.update(1)

        mlf.log_metric('valid_loss', metric_valid_loss.compute().item(), step=e)
        mlf.log_metric('valid_f1_score', metric_valid_f1_score.compute().item(), step=e)
        metric_valid_f1_score.reset()
        metric_valid_loss.reset()

epoch:   0%|          | 0/5 [00:00<?, ?it/s]

train_batch: 100%|██████████| 375/375 [00:02<00:00, 171.65it/s]

[A
[A
[A
valid_batch:  25%|██▌       | 94/375 [00:00<00:01, 253.40it/s]
train_batch: 100%|██████████| 375/375 [00:02<00:00, 170.04it/s]

[A
[A
[A
valid_batch:  25%|██▌       | 94/375 [00:00<00:01, 253.40it/s]
train_batch: 100%|██████████| 375/375 [00:02<00:00, 173.51it/s]

[A
[A
[A
valid_batch:  25%|██▌       | 94/375 [00:00<00:01, 259.18it/s]
train_batch: 100%|██████████| 375/375 [00:02<00:00, 172.10it/s]

[A
[A
[A
valid_batch:  25%|██▌       | 94/375 [00:00<00:01, 247.65it/s]
train_batch: 100%|██████████| 375/375 [00:02<00:00, 172.05it/s]

[A
[A
[A
valid_batch:  25%|██▌       | 94/375 [00:00<00:01, 256.43it/s]
epoch: 100%|██████████| 5/5 [00:12<00:00,  2.57s/it]


In [7]:
# Load preprocessed dataset
train_data, test_data = load_data(DATA_PATH)

# Prepare on-memory loading
train_loader = DataLoader(train_data, batch_size, shuffle=True, prefetch_factor=prefetch_factor, num_workers=num_workers, generator=gen_torch)
test_loader = DataLoader(test_data, batch_size, shuffle=True, prefetch_factor=prefetch_factor, num_workers=num_workers, generator=gen_torch)

In [8]:
for e in tm.trange(epochs, desc='epoch'):
    model = model.train(True).requires_grad_(True)
    with mlf.start_run(run_name=f'Training-Epoch-{e}'):
        with tm.tqdm(desc='train_batch', total=len(train_loader)) as batch:
            for X, y in train_loader:
                # Send data to GPU
                # TODO: KORNIA
                X: Tensor = X.to(DEVICE)
                y_true: Tensor = y.to(DEVICE)

                # Train
                optim.zero_grad()
                logits: Tensor = model(X)
                loss: Tensor = loss_fn(logits, y_true)
                loss.backward()
                optim.step()

                # Track metrics
                metric_train_f1_score.update(logits.detach(), y_true)
                metric_train_loss.update(loss.detach())
                batch.update(1)

        mlf.log_metric('train_loss', metric_train_loss.compute().item(), step=e)
        mlf.log_metric('train_f1_score', metric_train_f1_score.compute().item(), step=e)
        metric_train_f1_score.reset()
        metric_train_loss.reset()

epoch:   0%|          | 0/25 [00:00<?, ?it/s]

train_batch: 100%|██████████| 469/469 [00:07<00:00, 62.82it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 66.31it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 65.71it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 65.33it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 64.82it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 66.15it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 64.86it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 65.58it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 64.92it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 65.40it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 64.65it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 64.97it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 66.15it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 66.18it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 65.01it/s]
train_batch: 100%|██████████| 469/469 [00:07<00:00, 63.

In [9]:
test_loader = DataLoader(test_data, batch_size, shuffle=False, prefetch_factor=4, num_workers=8, generator=gen_torch)

model.requires_grad_(False)
model = model.eval()

preds = []

for X in test_loader:
    X = X.to(DEVICE)
    pred = torch.argmax(model(X), dim=-1)
    preds.extend(pred.cpu().tolist())

In [10]:
preds

[67,
 91,
 40,
 13,
 69,
 45,
 64,
 54,
 26,
 98,
 40,
 74,
 68,
 95,
 35,
 94,
 43,
 57,
 44,
 67,
 33,
 74,
 18,
 38,
 96,
 96,
 91,
 84,
 99,
 84,
 73,
 17,
 86,
 45,
 97,
 9,
 48,
 23,
 94,
 56,
 29,
 6,
 51,
 32,
 64,
 17,
 58,
 62,
 0,
 97,
 28,
 13,
 46,
 32,
 38,
 39,
 43,
 27,
 13,
 66,
 80,
 45,
 85,
 76,
 80,
 51,
 53,
 23,
 34,
 8,
 27,
 68,
 25,
 18,
 36,
 88,
 77,
 4,
 44,
 59,
 8,
 95,
 63,
 42,
 43,
 37,
 0,
 61,
 2,
 2,
 26,
 86,
 24,
 50,
 80,
 68,
 52,
 73,
 75,
 67,
 41,
 80,
 85,
 14,
 35,
 79,
 26,
 19,
 97,
 42,
 67,
 87,
 85,
 81,
 58,
 65,
 43,
 39,
 3,
 10,
 95,
 64,
 0,
 60,
 11,
 84,
 61,
 68,
 35,
 17,
 11,
 73,
 70,
 29,
 71,
 57,
 60,
 49,
 2,
 96,
 20,
 31,
 36,
 94,
 31,
 77,
 44,
 86,
 6,
 67,
 72,
 41,
 75,
 86,
 59,
 98,
 30,
 73,
 94,
 80,
 77,
 24,
 92,
 38,
 41,
 62,
 35,
 31,
 46,
 85,
 34,
 76,
 84,
 80,
 24,
 76,
 78,
 0,
 54,
 44,
 50,
 74,
 98,
 99,
 96,
 77,
 41,
 88,
 40,
 67,
 86,
 33,
 29,
 79,
 73,
 9,
 75,
 91,
 59,
 9,
 75,
 3,
 40,
 3

In [11]:
dataf = {'Image': [], 'Class': []}
dataf['Class'] = preds

In [12]:
test_data._GenImageDataset__data['Class'] = preds

In [13]:
test_data._GenImageDataset__data.to_csv('./submission3.csv', index=False)

In [14]:
test_data.data_

AttributeError: 'GenImageDataset' object has no attribute 'data_'