In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import cv2
from pathlib import Path
import skimage.io as io
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import os
import pickle
import pydicom
from metrics import Pfbeta

In [3]:
DATA_DIR = Path('input/rsna-breast-cancer-detection/')
train = pd.read_csv(DATA_DIR/'train.csv')

In [10]:
from configs import *
from kuma_utils.utils import sigmoid
from sklearn.metrics import roc_auc_score
from metrics import *

In [11]:
configs_first = {
    'cfgs': [Baseline4mod0()],
    'weights': [1],
    'percentiles': [60, None]
}
configs_second = {
    'cfgs': [Aug07()],
    'weights': [2],
    'percentiles': [50, None]
}
configs_third = {
    'cfgs': [AuxLoss02v0(), Res02Aux0()],
    'weights': [2, 2],
}


In [12]:
def predict_test(cfg, fold): # dummy
    with open(f'results/{cfg.name}/predictions.pickle', 'rb') as f:
        predictions = pickle.load(f)
    if 'targets' in predictions.keys():
        target = predictions['targets'][fold]
    else:
        target = None
    return sigmoid(predictions['outoffolds'][fold]), target


def weighted_average(df, weights):
    assert df.shape[1] == len(weights)
    res = np.zeros(df.shape[0])
    for i in range(len(weights)):
        res += df.iloc[:, i].values * weights[i] / sum(weights)
    return res

In [13]:
pf1 = Pfbeta(binarize=False)
pf1_bin = Pfbeta(return_thres=True)


In [14]:
# fold = 0

for fold in range(4):
    preds = {}
    preds_cols = []
    weights = []
    for cfg, w in zip(configs_first['cfgs'], configs_first['weights']):
        pred, target = predict_test(cfg, fold)
        preds[cfg.name] = pred.reshape(-1)
        preds_cols.append(cfg.name)
        weights.append(w)
    preds = pd.DataFrame(preds)
    preds['mask'] = -1
    preds['ensemble'] = weighted_average(preds[preds_cols], weights)
    preds_ens = preds['ensemble']
    preds.loc[preds_ens <= np.percentile(preds_ens, configs_first['percentiles'][0]), 'mask'] = 0
    if configs_first['percentiles'][1] is not None:
        preds.loc[preds_ens >= np.percentile(preds_ens, configs_first['percentiles'][1]), 'mask'] = 1
    print('Num passed stage 1: ', (preds['mask'] == -1).sum())

    for cfg, w in zip(configs_second['cfgs'], configs_second['weights']):
        pred, _ = predict_test(cfg, fold)
        preds[cfg.name] = pred.reshape(-1)
        preds_cols.append(cfg.name)
        weights.append(w)
    preds['ensemble'] = weighted_average(preds[preds_cols], weights)
    preds_ens = preds['ensemble']
    preds.loc[
        (preds_ens <= np.percentile(preds.query('mask == -1')['ensemble'], configs_second['percentiles'][0])) & (preds['mask'] == -1),  'mask'] = 0
    if configs_second['percentiles'][1] is not None:
        preds.loc[
            (preds_ens >= np.percentile(preds.query('mask == -1')['ensemble'], configs_second['percentiles'][1])) & (preds['mask'] == -1),  'mask'] = 1
    print('Num passed stage 2: ', (preds['mask'] == -1).sum())


    for cfg, w in zip(configs_third['cfgs'], configs_third['weights']):
        pred, _ = predict_test(cfg, fold)
        preds[cfg.name] = pred.reshape(-1)
        preds_cols.append(cfg.name)
        weights.append(w)
    preds['target'] = target
    preds['ensemble'] = weighted_average(preds[preds_cols], weights)
    print(fold, 'ref', pf1_bin(preds['ensemble'], preds['target']))
    preds.loc[preds['mask'] == 0, 'ensemble'] = 0
    preds.loc[preds['mask'] == 1, 'ensemble'] = 1
    print(fold, pf1(preds['ensemble'], preds['target']), pf1_bin(preds['ensemble'], preds['target']))

Num passed stage 1:  2382
Num passed stage 2:  1191
0 ref (0.5164319248826291, 0.41000000000000003)
0 0.39920842128473494 (0.5164319248826291, 0.41000000000000003)
Num passed stage 1:  2383
Num passed stage 2:  1191
1 ref (0.5302325581395348, 0.25)
1 0.3730773884604484 (0.5327102803738318, 0.25)
Num passed stage 1:  2382
Num passed stage 2:  1191
2 ref (0.5000000000000001, 0.3)
2 0.3662673609233155 (0.5000000000000001, 0.3)
Num passed stage 1:  2382
Num passed stage 2:  1191
3 ref (0.4827586206896552, 0.26)
3 0.33532665345876567 (0.4827586206896552, 0.26)
