In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../

/mnt/NVME1TB/Projects/severstal


In [3]:
TRAIN_IMAGES = '/home/denilv/Projects/severstal/data/train_images/'
TRAIN_CSV = '/mnt/NVME1TB/Projects/severstal/data/cls_df/train.csv'
VALID_CSV = '/mnt/NVME1TB/Projects/severstal/data/cls_df/valid.csv'
TEST_IMAGES = '/home/denilv/Projects/severstal/data/test_images/'

In [4]:
EPOCHS = 30
LR = 1e-3
BATCH_SIZE = 32
CROP_SIZE = None

CUDA_VISIBLE_DEVICES = '0'

In [16]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = CUDA_VISIBLE_DEVICES
import torch
import numpy as np
import pandas as pd
import segmentation_models_pytorch as smp

from albumentations.augmentations.functional import normalize
from tqdm.auto import tqdm
from modules.comp_tools import ClsDataset, AUGMENTATIONS_TRAIN, get_model, predict_cls, ModelAgg
from modules.common import rle_decode
from catalyst.dl.runner import SupervisedRunner
from catalyst.dl.callbacks import F1ScoreCallback, AccuracyCallback
from torch.utils.data import DataLoader as BaseDataLoader
from torch.utils.data import Dataset as BaseDataset

In [6]:
def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')

In [7]:
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

In [8]:
preprocessing_fn = lambda x: to_tensor(normalize(x, MEAN, STD))

In [9]:
ENCODER = 'resnet50'
ENCODER_WEIGHTS = 'imagenet'
ACTIVATION = 'sigmoid'

CONTINUE = '/mnt/NVME1TB/Projects/severstal/logs/cls_resnet50_new_wave/checkpoints/best.pth'

model = get_model(ENCODER, 2, ENCODER_WEIGHTS, load_weights=CONTINUE)
model = model.cuda()
model = model.eval()

resnet50 = model

Loading /mnt/NVME1TB/Projects/severstal/logs/cls_resnet50_new_wave/checkpoints/best.pth
<All keys matched successfully>


In [10]:
ENCODER = 'resnet18'
ENCODER_WEIGHTS = 'imagenet'
ACTIVATION = 'sigmoid'

CONTINUE = '/mnt/NVME1TB/Projects/severstal/logs/cls_resnet18_new_wave/checkpoints/best.pth'

model = get_model(ENCODER, 2, ENCODER_WEIGHTS, load_weights=CONTINUE)
model = model.cuda()
model = model.eval()

resnet18 = model

Loading /mnt/NVME1TB/Projects/severstal/logs/cls_resnet18_new_wave/checkpoints/best.pth
<All keys matched successfully>


In [29]:
train_df = pd.read_csv(TRAIN_CSV).fillna('')
valid_df = pd.read_csv(VALID_CSV).fillna('')

valid_df.loc[valid_df.has_defect == 0, 'class'] = -1
train_df.loc[train_df.has_defect == 0, 'class'] = -1

train_dataset = ClsDataset(
    train_df,
    img_prefix=TRAIN_IMAGES, 
    augmentations=AUGMENTATIONS_TRAIN, 
    preprocess_img=preprocessing_fn,
)
valid_dataset = ClsDataset(
    valid_df,
    img_prefix=TRAIN_IMAGES, 
    augmentations=None, #AUGMENTATIONS_TRAIN, 
    preprocess_img=preprocessing_fn,
)
train_dl = BaseDataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
valid_dl = BaseDataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [15]:
from sklearn.metrics import f1_score as sk_f1_score
from sklearn.metrics import accuracy_score as sk_accuracy_score
from sklearn.metrics import classification_report

from tqdm.auto import tqdm

from catalyst.dl.utils.criterion import f1_score, accuracy

import ttach as tta

In [37]:
def score(model, th=0.5):
    pr, tr = predict_cls(model, valid_dl)
    pred_probs = pr[:, 1].numpy()
    gt = tr[:, 1]
    
    pred_labels = pred_probs > th
    acc = sk_accuracy_score(gt, pred_labels)
    fscore = sk_f1_score(gt, pred_labels)
    print('Acc', acc)
    print('F1', fscore)

In [32]:
score(resnet50)

HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Acc 0.9613853503184714
F1 0.9634926608957471


In [35]:
ens_model = ModelAgg([resnet18, resnet50])
tta_ens_model = tta.ClassificationTTAWrapper(ens_model, tta.aliases.hflip_transform())
tta_resnet50 = tta.ClassificationTTAWrapper(resnet50, tta.aliases.hflip_transform())
tta_resnet18 = tta.ClassificationTTAWrapper(resnet18, tta.aliases.hflip_transform())

models = [
    ('resnet18', resnet18),
    ('resnet50', resnet50),
    ('ens_model', ens_model),
    ('tta_resnet18', tta_resnet18),
    ('tta_resnet50', tta_resnet50),
    ('tta_ens_model', tta_ens_model),
]

In [38]:
for model_name, model in models:
    print(model_name)
    score(model)
    

resnet18


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Acc 0.9617834394904459
F1 0.9634146341463413
resnet50


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Acc 0.9613853503184714
F1 0.9634926608957471
ens_model


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Acc 0.9669585987261147
F1 0.9687146626460611
tta_resnet18


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Acc 0.9605891719745223
F1 0.9623430962343095
tta_resnet50


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Acc 0.9633757961783439
F1 0.9654654654654654
tta_ens_model


HBox(children=(IntProgress(value=0, max=79), HTML(value='')))


Acc 0.9673566878980892
F1 0.9690799396681749


In [33]:
# True Positive (TP): we predict a label of 1 (positive), and the true label is 1.
TP = np.sum(np.logical_and(pred_labels == 1, true_labels == 1))
 
# True Negative (TN): we predict a label of 0 (negative), and the true label is 0.
TN = np.sum(np.logical_and(pred_labels == 0, true_labels == 0))
 
# False Positive (FP): we predict a label of 1 (positive), but the true label is 0.
FP = np.sum(np.logical_and(pred_labels == 1, true_labels == 0))
 
# False Negative (FN): we predict a label of 0 (negative), but the true label is 1.
FN = np.sum(np.logical_and(pred_labels == 0, true_labels == 1))
 
print('TP: %i, FP: %i, TN: %i, FN: %i' % (TP,FP,TN,FN))

TP: 1292, FP: 54, TN: 1125, FN: 41
