In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
import wandb

import torch
from torch import nn
from torchvision import models
import torch.optim as optim

from dataset import get_dataloaders, get_datasets
from utils import seed_everything
from trainer import Trainer

# Params
Image.MAX_IMAGE_PIXELS = 1e11
CFG = {
    'seed': 42,
    'base_model': 'resnet34',   # resnet18/34/50, efficientnet_v2_s/m/l
    'img_size': 1024,
    'batch_size': 4,
    'freeze_epochs': 1,
    'epochs': 10,
    'base_lr': 1e-3,
    'affine_degrees': 10,
    'affine_translate': (0.1, 0.2),
    'affine_scale': (0.8, 1.2),
    'cv_fold': 5,
}

# Wandb
wandb.login(key='1b0401db7513303bdea77fb070097f9d2850cf3b')
run = wandb.init(project='kaggle-ubc-ocean', config=CFG, tags=['torch', 'baseline'])

# Label encoder/decoder
encode = {'HGSC': 0, 'LGSC': 1, 'EC': 2, 'CC': 3, 'MC': 4}
decode = {v: k for k, v in encode.items()}

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Paths
root = '/media/latlab/MR/projects/kaggle-ubc-ocean'
data_dir = os.path.join(root, 'data')
results_dir = os.path.join(root, 'results')
train_csv = 'train.csv'
train_image_dir = os.path.join(data_dir, 'train_images')
train_thumbnail_dir = os.path.join(data_dir, 'train_thumbnails')

# Seed
seed_everything(CFG['seed'])

# Load data
df = pd.read_csv(os.path.join(data_dir, train_csv))
df['label'] = df.loc[:,'label'].map(encode)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnaraiadam88[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/latlab/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112648900598288, max=1.0…

In [2]:
def train_model(CFG, train_image_dir, train_thumbnail_dir, df_train, df_validation, encode, wandb_log=False):
    # Data loaders
    datasets = get_datasets(CFG, train_image_dir, train_thumbnail_dir, df_train, df_validation)
    dataloaders = get_dataloaders(CFG, datasets)

    # Model definition
    model = models.get_model(CFG['base_model'], weights='DEFAULT').to(device)
    for param in model.parameters():
        param.requires_grad = False
    model.fc = nn.Linear(model.fc.in_features, len(encode)).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=CFG['base_lr'], momentum=0.9)
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    # Training
    trainer = Trainer(model, dataloaders, loss_fn, optimizer, exp_lr_scheduler, device, metric='balanced_accuracy', wandb_log=False)
    model, _ = trainer.train_epochs(num_epochs=CFG['freeze_epochs'])
    trainer.unfreeze()
    model, best_balanced_acc = trainer.train_epochs(num_epochs=CFG['epochs'])
    return model, best_balanced_acc

In [3]:
skf = StratifiedKFold(n_splits=CFG['cv_fold'], random_state=CFG['seed'], shuffle=True)
balanced_acc_list = []
lb = df.label
for cv, (train_index, valid_index) in enumerate(skf.split(np.zeros(len(lb)), lb)):
    print(f"Cross-validation fold {cv+1}/{CFG['cv_fold']}")
    df_train = df.iloc[train_index]
    df_validation = df.iloc[valid_index]
    run_name = f'{run.name}-cv{cv+1}'
    model, balanced_acc = train_model(CFG, train_image_dir, train_thumbnail_dir, df_train, df_validation, encode)
    balanced_acc_list.append(balanced_acc)
    torch.save(model.state_dict(), os.path.join(results_dir, 'models', f'ubc-ocean_{run_name}.pt'))
    wandb.log({f'best_balanced_acc_cv{cv+1}': balanced_acc})
wandb.log({f'mean_best_balanced_acc': np.mean(balanced_acc_list)})
wandb.finish()

Cross-validation fold 1/5
Epoch 1/1
----------


100%|██████████| 108/108 [00:06<00:00, 16.07it/s]


train loss: 1.6161, test loss: 1.3718, balanced_accuracy: 0.4012

Training complete in 0m 10s
Final balanced_accuracy: 0.401235

Epoch 1/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.86it/s]


train loss: 1.4610, test loss: 1.4233, balanced_accuracy: 0.4043

Epoch 2/10
----------


100%|██████████| 108/108 [00:11<00:00,  9.18it/s]


train loss: 1.3297, test loss: 1.7024, balanced_accuracy: 0.4290

Epoch 3/10
----------


100%|██████████| 108/108 [00:11<00:00,  9.13it/s]


train loss: 1.1995, test loss: 1.3529, balanced_accuracy: 0.5926

Epoch 4/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.95it/s]


train loss: 1.2600, test loss: 1.4452, balanced_accuracy: 0.5370

Epoch 5/10
----------


100%|██████████| 108/108 [00:11<00:00,  9.32it/s]


train loss: 1.1831, test loss: 1.7590, balanced_accuracy: 0.4506

Epoch 6/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.70it/s]


train loss: 1.1216, test loss: 1.1616, balanced_accuracy: 0.6049

Epoch 7/10
----------


100%|██████████| 108/108 [00:11<00:00,  9.05it/s]


train loss: 0.7889, test loss: 0.9612, balanced_accuracy: 0.6574

Epoch 8/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.95it/s]


train loss: 0.8258, test loss: 0.9841, balanced_accuracy: 0.6451

Epoch 9/10
----------


100%|██████████| 108/108 [00:11<00:00,  9.13it/s]


train loss: 0.8203, test loss: 0.9607, balanced_accuracy: 0.6698

Epoch 10/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.84it/s]


train loss: 0.7456, test loss: 0.9928, balanced_accuracy: 0.6327

Training complete in 2m 35s
Final balanced_accuracy: 0.632716

Cross-validation fold 2/5
Epoch 1/1
----------


100%|██████████| 108/108 [00:05<00:00, 18.75it/s]


train loss: 1.7091, test loss: 1.6258, balanced_accuracy: 0.2901

Training complete in 0m 9s
Final balanced_accuracy: 0.290123

Epoch 1/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.98it/s]


train loss: 1.4492, test loss: 1.2627, balanced_accuracy: 0.4753

Epoch 2/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.84it/s]


train loss: 1.3089, test loss: 1.1774, balanced_accuracy: 0.5432

Epoch 3/10
----------


100%|██████████| 108/108 [00:11<00:00,  9.03it/s]


train loss: 1.1793, test loss: 1.2040, balanced_accuracy: 0.5586

Epoch 4/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.85it/s]


train loss: 1.1961, test loss: 1.3144, balanced_accuracy: 0.5586

Epoch 5/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.88it/s]


train loss: 1.1989, test loss: 0.9742, balanced_accuracy: 0.6235

Epoch 6/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.88it/s]


train loss: 1.1759, test loss: 1.1285, balanced_accuracy: 0.6451

Epoch 7/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.74it/s]


train loss: 0.8820, test loss: 0.7914, balanced_accuracy: 0.7006

Epoch 8/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.77it/s]


train loss: 0.8435, test loss: 0.7284, balanced_accuracy: 0.7407

Epoch 9/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.85it/s]


train loss: 0.7484, test loss: 0.7725, balanced_accuracy: 0.7377

Epoch 10/10
----------


100%|██████████| 108/108 [00:11<00:00,  9.00it/s]


train loss: 0.7624, test loss: 0.7100, balanced_accuracy: 0.7222

Training complete in 2m 36s
Final balanced_accuracy: 0.722222

Cross-validation fold 3/5
Epoch 1/1
----------


100%|██████████| 108/108 [00:05<00:00, 18.75it/s]


train loss: 1.5937, test loss: 1.4658, balanced_accuracy: 0.2809

Training complete in 0m 10s
Final balanced_accuracy: 0.280864

Epoch 1/10
----------


100%|██████████| 108/108 [00:16<00:00,  6.54it/s]


train loss: 1.5453, test loss: 1.4022, balanced_accuracy: 0.3488

Epoch 2/10
----------


100%|██████████| 108/108 [00:17<00:00,  6.12it/s]


train loss: 1.3411, test loss: 1.2215, balanced_accuracy: 0.6389

Epoch 3/10
----------


100%|██████████| 108/108 [00:18<00:00,  5.74it/s]


train loss: 1.2579, test loss: 1.0858, balanced_accuracy: 0.6204

Epoch 4/10
----------


100%|██████████| 108/108 [00:19<00:00,  5.42it/s]


train loss: 1.3061, test loss: 1.3812, balanced_accuracy: 0.5401

Epoch 5/10
----------


100%|██████████| 108/108 [00:19<00:00,  5.54it/s]


train loss: 1.0207, test loss: 1.1677, balanced_accuracy: 0.5710

Epoch 6/10
----------


100%|██████████| 108/108 [00:19<00:00,  5.62it/s]


train loss: 0.9987, test loss: 1.3499, balanced_accuracy: 0.5216

Epoch 7/10
----------


100%|██████████| 108/108 [00:20<00:00,  5.31it/s]


train loss: 0.8170, test loss: 0.8342, balanced_accuracy: 0.7160

Epoch 8/10
----------


100%|██████████| 108/108 [00:19<00:00,  5.58it/s]


train loss: 0.7419, test loss: 0.9548, balanced_accuracy: 0.7160

Epoch 9/10
----------


100%|██████████| 108/108 [00:16<00:00,  6.51it/s]


train loss: 0.7877, test loss: 0.8903, balanced_accuracy: 0.7531

Epoch 10/10
----------


100%|██████████| 108/108 [00:15<00:00,  7.17it/s]


train loss: 0.6980, test loss: 0.9017, balanced_accuracy: 0.7562

Training complete in 3m 50s
Final balanced_accuracy: 0.756173

Cross-validation fold 4/5
Epoch 1/1
----------


100%|██████████| 108/108 [00:05<00:00, 18.76it/s]


train loss: 1.5861, test loss: 1.4506, balanced_accuracy: 0.4259

Training complete in 0m 10s
Final balanced_accuracy: 0.425926

Epoch 1/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.87it/s]


train loss: 1.4112, test loss: 1.3579, balanced_accuracy: 0.4722

Epoch 2/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.98it/s]


train loss: 1.4289, test loss: 1.2806, balanced_accuracy: 0.5154

Epoch 3/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.90it/s]


train loss: 1.2402, test loss: 1.5361, balanced_accuracy: 0.4660

Epoch 4/10
----------


100%|██████████| 108/108 [00:12<00:00,  8.83it/s]


train loss: 1.1861, test loss: 1.0757, balanced_accuracy: 0.5710

Epoch 5/10
----------


100%|██████████| 108/108 [00:11<00:00,  9.02it/s]


In [None]:
# Final training on all data
model, _ = train_model(CFG, train_image_dir, train_thumbnail_dir, df, df, encode, wandb_log=True)
torch.save(model.state_dict(), os.path.join(results_dir, 'models', f'ubc-ocean_{run_name}.pt'))