In [18]:
from os.path import join, splitext, basename
from os import listdir
import torch
from torch.optim.swa_utils import AveragedModel, update_bn
import numpy as np
import librosa
import torch.nn.functional as F
from tqdm import tqdm
from scipy.stats import mode
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
from natsort import natsorted
from sklearn.metrics import confusion_matrix, roc_auc_score
from coreml.utils.io import read_yml
from coreml.config import Config
from coreml.data.dataloader import get_dataloader
from coreml.models import factory as model_factory
from coreml.utils.logger import set_logger

In [2]:
config_name = 'competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384'

In [3]:
whole_train_2020 = pd.read_csv('/data/siim-isic-melanoma/raw/2020/train.csv')
whole_train_2019 = pd.read_csv('/data/siim-isic-melanoma/raw/2019/train.csv')
test_2020 = pd.read_csv('/data/siim-isic-melanoma/raw/2020/test.csv')

In [4]:
train = whole_train_2019[['image_name', 'target']].append(whole_train_2020[['image_name', 'target']])

In [5]:
train.head()

Unnamed: 0,image_name,target
0,ISIC_0000000,0
1,ISIC_0000001,0
2,ISIC_0000002,1
3,ISIC_0000003,0
4,ISIC_0000004,1


In [6]:
data_config_path = f'/data/siim-isic-melanoma/processed/versions/v3.0.0.yml'
print(f'Reading data config: {data_config_path}')
data_config = read_yml(data_config_path)

Reading data config: /data/siim-isic-melanoma/processed/versions/v3.0.0.yml


In [7]:
prediction_val = pd.read_csv(join('/output', config_name, 'logs/evaluation/val.csv'))

In [8]:
val = pd.DataFrame(data_config['val'])
val['image_name'] = val['file'].apply(lambda x: splitext(basename(x))[0])
val['label'] = val['label'].apply(lambda x: x['classification'])
val = val.drop(columns=['file'])

print('Shapes:')
print(len(prediction_val), len(val))
print()
    
val = pd.merge(prediction_val, val)
    
print('Performance without using SWA')
val_preds = val['target'].values
val_labels = val['label'].values
roc = roc_auc_score(val_labels, val_preds)
print(roc)

Shapes:
6536 6536

Performance without using SWA
0.9181316467934257


In [9]:
config = Config(join('/workspace/coreml', config_name + '.yml'))

In [19]:
set_logger(join(config.log_dir, 'debug.log'))

In [10]:
val_dataloader, _ = get_dataloader(
        config.data, 'val',
        config.model['batch_size'],
        num_workers=10,
        shuffle=False,
        drop_last=False)

[33m=> Loading dataset version file: [siim-isic-melanoma, v3.0.0, val][0m


Loading items: 100%|██████████| 6536/6536 [00:00<00:00, 355997.86it/s]


In [16]:
# set epoch
config.model['load']['version'] = config_name
config.model['load']['load_best'] = True

In [39]:
config.checkpoint_dir = '/output/' + config_name + '/checkpoints'

In [40]:
model = model_factory.create(config.model['name'], **{'config': config})

INFO:root:[33mBuilding the network[0m
INFO:root:[33mSetting up the optimizer ...[0m
INFO:root:[33m=> Loading model weights from /output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/best_ckpt.pth.tar[0m
INFO:root:[33mFreezing specified layers[0m
INFO:root:[33mUsing loss functions:[0m
INFO:root:{'train': {'name': 'binary-cross-entropy', 'params': {'reduction': 'none'}}, 'val': {'name': 'binary-cross-entropy', 'params': {'reduction': 'none'}}, 'test': {'name': 'binary-cross-entropy', 'params': {'reduction': 'none'}}}


# SWA

In [21]:
swa_model = AveragedModel(model.network)

In [41]:
# all checkpoints available
available_ckpts = natsorted(glob(join(config.checkpoint_dir, '*')))[::-1]

In [42]:
available_ckpts

['/output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/best_ckpt.pth.tar',
 '/output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/19_ckpt.pth.tar',
 '/output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/17_ckpt.pth.tar',
 '/output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/15_ckpt.pth.tar',
 '/output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/13_ckpt.pth.tar',
 '/output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/11_ckpt.pth.tar',
 '/output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/9_ckpt.pth.tar',
 '/output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/7_ckpt.pth.tar',
 '/output/competitions/2020/melanoma-classificat

In [45]:
swa_epochs = np.arange(5, 15)

for epoch in tqdm(swa_epochs):
    config.model['load']['epoch'] = epoch
    model = model_factory.create(config.model['name'], **{'config': config})
    swa_model.update_parameters(model.network)

  0%|          | 0/10 [00:00<?, ?it/s]INFO:root:[33mBuilding the network[0m
INFO:root:[33mSetting up the optimizer ...[0m
INFO:root:[33m=> Loading model weights from /output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/best_ckpt.pth.tar[0m
INFO:root:[33mFreezing specified layers[0m
INFO:root:[33mUsing loss functions:[0m
INFO:root:{'train': {'name': 'binary-cross-entropy', 'params': {'reduction': 'none'}}, 'val': {'name': 'binary-cross-entropy', 'params': {'reduction': 'none'}}, 'test': {'name': 'binary-cross-entropy', 'params': {'reduction': 'none'}}}
 10%|█         | 1/10 [00:01<00:11,  1.28s/it]INFO:root:[33mBuilding the network[0m
INFO:root:[33mSetting up the optimizer ...[0m
INFO:root:[33m=> Loading model weights from /output/competitions/2020/melanoma-classification/configs/effb5/best-1cycle-wd4e-1-384/checkpoints/best_ckpt.pth.tar[0m
INFO:root:[33mFreezing specified layers[0m
INFO:root:[33mUsing loss functions:[0m


In [46]:
# load the train data loader for doing a forward pass on the model
train_dataloader, _ = get_dataloader(
        config.data, 'train',
        config.model['batch_size'],
        num_workers=10,
        shuffle=False,
        drop_last=False)

INFO:root:[34mCreating train DataLoader[0m


[33m=> Loading dataset version file: [siim-isic-melanoma, v3.0.0, train][0m


Loading items: 100%|██████████| 39015/39015 [00:00<00:00, 668216.07it/s]


In [47]:
# update batch norm params
for batch in tqdm(train_dataloader):
    swa_model(batch['signals'].cuda())

  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
100%|██████████| 133/133 [01:03<00:00,  2.09it/s]


In [49]:
# set the SWA model as the network
model.network = swa_model

In [None]:
# compute the new results
results = model.process_epoch(val_dataloader, mode='val', use_wandb=False)

INFO:root:[34mSetting network training mode:[0m: [33m[35meval[0m[0m
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()
  image = torch.from_numpy(image).float()


In [None]:
# results['auc-roc']