In [10]:
%load_ext autoreload
%autoreload 2

In [8]:
import numpy as np
import pandas as pd
import cv2
from pathlib import Path
import skimage.io as io
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import os
import pickle
import pydicom
from metrics import Pfbeta

In [9]:
DATA_DIR = Path('input/rsna-breast-cancer-detection/')
train = pd.read_csv(DATA_DIR/'train.csv')

In [2]:
from configs import *
from kuma_utils.utils import sigmoid
from sklearn.metrics import roc_auc_score
from metrics import *

In [3]:
[Aug07(), Aug07pl2aug2(), Res02Aux0(), Res02pl0pr0(), AuxLoss03(), Aug07mod1()]

[<configs.Aug07 at 0x7fdfd0589d30>,
 <configs.Aug07pl2aug2 at 0x7fdfd0589fd0>,
 <configs.Res02Aux0 at 0x7fdfd0589f70>,
 <configs.Res02pl0pr0 at 0x7fdfd0589f40>,
 <configs.AuxLoss03 at 0x7fdfd0589c40>]

In [16]:
configs_first = {
    'cfgs': [Aug07()],
    'weights': [1],
    'percentiles': [0, None]
}
configs_second = {
    'cfgs': [AuxLoss03()],
    'weights': [1],
    'percentiles': [0, None]
}
configs_third = {
    'cfgs': [Aug07pl2aug2(), Res02Aux0(), Res02pl0pr0(), Aug07lr0(), Aug07mod1()],
    'weights': [1, 1, 1, 1, 1],
}

In [17]:
def predict_test(cfg, fold): # dummy
    with open(f'results/{cfg.name}/predictions.pickle', 'rb') as f:
        predictions = pickle.load(f)
    if 'targets' in predictions.keys():
        target = predictions['targets'][fold]
    else:
        target = None
    return sigmoid(predictions['outoffolds'][fold]), target


def weighted_average(df, weights):
    assert df.shape[1] == len(weights)
    res = np.zeros(df.shape[0])
    for i in range(len(weights)):
        res += df.iloc[:, i].values * weights[i] / sum(weights)
    return res

In [18]:
pf1 = Pfbeta(binarize=False)
pf1_bin = Pfbeta(return_thres=True)


In [19]:
# fold = 0

for fold in range(4):
    preds = {}
    preds_cols = []
    weights = []
    for cfg, w in zip(configs_first['cfgs'], configs_first['weights']):
        pred, target = predict_test(cfg, fold)
        preds[cfg.name] = pred.reshape(-1)
        preds_cols.append(cfg.name)
        weights.append(w)
    preds = pd.DataFrame(preds)
    preds['mask'] = -1
    preds['ensemble'] = weighted_average(preds[preds_cols], weights)
    preds_ens = preds['ensemble']
    preds.loc[preds_ens <= np.percentile(preds_ens, configs_first['percentiles'][0]), 'mask'] = 0
    if configs_first['percentiles'][1] is not None:
        preds.loc[preds_ens >= np.percentile(preds_ens, configs_first['percentiles'][1]), 'mask'] = 1
    print('Num passed stage 1: ', (preds['mask'] == -1).sum())

    for cfg, w in zip(configs_second['cfgs'], configs_second['weights']):
        pred, _ = predict_test(cfg, fold)
        preds[cfg.name] = pred.reshape(-1)
        preds_cols.append(cfg.name)
        weights.append(w)
    preds['ensemble'] = weighted_average(preds[preds_cols], weights)
    preds_ens = preds['ensemble']
    preds.loc[
        (preds_ens <= np.percentile(preds.query('mask == -1')['ensemble'], configs_second['percentiles'][0])) & (preds['mask'] == -1),  'mask'] = 0
    if configs_second['percentiles'][1] is not None:
        preds.loc[
            (preds_ens >= np.percentile(preds.query('mask == -1')['ensemble'], configs_second['percentiles'][1])) & (preds['mask'] == -1),  'mask'] = 1
    print('Num passed stage 2: ', (preds['mask'] == -1).sum())


    for cfg, w in zip(configs_third['cfgs'], configs_third['weights']):
        pred, _ = predict_test(cfg, fold)
        preds[cfg.name] = pred.reshape(-1)
        preds_cols.append(cfg.name)
        weights.append(w)
    preds['target'] = target
    preds['ensemble'] = weighted_average(preds[preds_cols], weights)
    print(fold, 'ref', pf1_bin(preds['ensemble'], preds['target']))
    preds.loc[preds['mask'] == 0, 'ensemble'] = 0
    preds.loc[preds['mask'] == 1, 'ensemble'] = 1
    print(fold, pf1(preds['ensemble'], preds['target']), pf1_bin(preds['ensemble'], preds['target']))

Num passed stage 1:  5955
Num passed stage 2:  5954
0 ref (0.5551020408163265, 0.28)
0 0.3769341991655249 (0.5551020408163265, 0.28)
Num passed stage 1:  5957
Num passed stage 2:  5956
1 ref (0.5473684210526315, 0.35000000000000003)
1 0.3419637282421752 (0.5473684210526315, 0.35000000000000003)
Num passed stage 1:  5955
Num passed stage 2:  5954
2 ref (0.5051546391752577, 0.22)
2 0.3339334075119696 (0.5051546391752577, 0.22)
Num passed stage 1:  5955
Num passed stage 2:  5954
3 ref (0.48936170212765956, 0.36)
3 0.3343699566463442 (0.48936170212765956, 0.36)
