In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import pickle
from pathlib import Path
from kuma_utils.utils import sigmoid
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import Ridge

from datasets import *
from transforms import *
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [3]:
test = pd.read_csv('input/g2net-detecting-continuous-gravitational-waves/sample_submission.csv')
valid = pd.read_csv('input/g2net-detecting-continuous-gravitational-waves/v18v.csv')
public748 = pd.read_csv('input/public748.csv')
iafoss_v5160 = {
    'oof': pd.read_csv('input/predictions/val_model_v5_1_6_0.csv'),
    'pred': pd.read_csv('input/predictions/submission_model_v5_1_6_0.csv')}
iafoss_v5210 = {
    'oof': pd.read_csv('input/predictions/val_model_v5_2_1_0.csv'),
    'pred': pd.read_csv('input/predictions/submission_model_v5_2_1_0.csv')}
iafoss_v7000 = {
    'oof': pd.read_csv('input/predictions/val_model_v7_0_0_0.csv'),
    'pred': pd.read_csv('input/predictions/submission_model_v7_0_0_0.csv')}
chris_v16 = {
    'oof': pd.read_csv('input/predictions/valset_16_0.7036.csv'),
    'pred': pd.read_csv('input/predictions/sub_16_bce_mse.csv')}
chris_v18 = {
    'oof': pd.read_csv('input/predictions/valset_18_0.6997.csv'),
    'pred': pd.read_csv('input/predictions/sub_18_double_model.csv')}
drhb_exp200 = {
    'oof': pd.read_csv('input/predictions/EXP_200_BASELINE_CASHE_V4_EVAL.csv', usecols=['id', 'pred']),
    'pred': pd.read_csv('input/predictions/EXP_200_BASELINE_CASHE_V4.csv')}

In [4]:
model_paths = [
    # Path('results/ds_09_val0'),
    # Path('results/ds_09_val1'),
    # Path('results/aug_04'),
    # # Path('results/ds_14'),
    # # Path('results/ds_15'),
    # # Path('results/ds_16'),
    # # Path('results/model_02'),
    # Path('results/ds_14_prep2'),
    Path('results/ds_17_prep1'),
]


In [5]:
prediction_df = {}
oof_df = {}
for p in model_paths:
    pred = sigmoid(np.load(p/'predictions_tta.npy'))
    oof = sigmoid(np.load(p/'outoffolds_tta.npy'))

    if pred.shape[1] == 1:
        pred = pred.reshape(-1)
    elif pred.shape[0] == 5:
        pred = pred[0].reshape(-1)
    prediction_df[p.stem] = pred
    oof_df[p.stem] = oof.reshape(-1)
prediction_df = pd.DataFrame(prediction_df)
prediction_df['id'] = test['id']
prediction_df['iafoss_v5160'] = iafoss_v5160['pred'].merge(prediction_df[['id']], on='id', how='right')['target']
prediction_df['iafoss_v5210'] = iafoss_v5210['pred'].merge(prediction_df[['id']], on='id', how='right')['target']
prediction_df['iafoss_v7000'] = iafoss_v7000['pred'].merge(prediction_df[['id']], on='id', how='right')['target']
prediction_df['drhb_exp200'] = drhb_exp200['pred'].merge(prediction_df[['id']], on='id', how='right')['target']
prediction_df['chris_v16'] = chris_v16['pred'].merge(prediction_df[['id']], on='id', how='right')['target']
prediction_df['chris_v18'] = chris_v18['pred'].merge(prediction_df[['id']], on='id', how='right')['target']
oof_df = pd.DataFrame(oof_df)
oof_df['id'] = valid['id']
oof_df['iafoss_v5160'] = iafoss_v5160['oof']['target']
oof_df['iafoss_v5210'] = iafoss_v5210['oof']['target']
oof_df['iafoss_v7000'] = iafoss_v7000['oof']['target']
oof_df['drhb_exp200'] = drhb_exp200['oof']['pred']
oof_df['chris_v16'] = chris_v16['oof']['target']
oof_df['chris_v18'] = chris_v18['oof']['target']

In [6]:
oof_df['target'] = valid['target']
oof_df.dropna(inplace=True)
oof_df

Unnamed: 0,ds_17_prep1,id,iafoss_v5160,iafoss_v5210,iafoss_v7000,drhb_exp200,chris_v16,chris_v18,target
0,1.000000,3c451363c_negative,0.521484,0.571289,0.519531,0.270715,0.246893,0.255523,0
1,0.474567,d036cb94b_negative,0.471680,0.505859,0.490967,0.308455,0.288972,0.292009,0
2,0.397195,ef4bfb410_negative,0.513184,0.550781,0.540039,0.272621,0.351056,0.263222,0
3,0.444880,c374ae4ef_negative,0.532715,0.551758,0.555176,0.320555,0.274788,0.264604,0
4,0.999986,48355bd69_negative,0.597656,0.585938,0.646973,0.808276,0.342514,0.278388,0
...,...,...,...,...,...,...,...,...,...
7970,0.725813,ffbce04ef_weak,0.470703,0.521973,0.489746,0.248373,0.298534,0.250466,1
7971,0.401875,ffc2d976b_weak,0.556641,0.575195,0.619141,0.678827,0.276752,0.250920,1
7972,0.423313,ffc905909_weak,0.998535,0.990234,0.999023,0.926371,0.364422,0.277566,1
7973,1.000000,ffe276f3e_weak,0.531738,0.555664,0.547363,0.315376,0.697399,0.349287,1


In [7]:
model = Ridge(positive=True)
X = oof_df.drop(['target', 'id'], axis=1)
y = oof_df['target']
X_test = prediction_df.drop('id', axis=1)
model.fit(X, y)
y_pred = model.predict(X)
y_stack = model.predict(X_test)
print('stacking cv', roc_auc_score(y, y_pred))
print('weight', model.coef_)

stacking cv 0.7496910838259352
weight [0.00221885 0.31179457 0.35329353 0.30456682 0.05912307 0.01997932
 0.        ]


In [8]:
X_test.columns

Index(['ds_17_prep1', 'iafoss_v5160', 'iafoss_v5210', 'iafoss_v7000',
       'drhb_exp200', 'chris_v16', 'chris_v18'],
      dtype='object')

In [9]:
y_stack

array([0.49505544, 0.94399915, 0.42097785, ..., 0.46043555, 0.48345743,
       0.46926832])

In [10]:
# test['target'] = prediction_df['aug_04'] * 0.2 + prediction_df['ds_05_aug2'] * 0.2 + prediction_df['public748'] * 0.6
# test['target'] = prediction_df['ds_09'] * 0.33 + prediction_df['ds_13'] * 0.33 + prediction_df['public748'] * 0.33
test['target'] = prediction_df['ds_17_prep1']
# test['target'] = y_stack

In [11]:
test

Unnamed: 0,id,target
0,00054c878,0.471808
1,0007285a3,0.922880
2,00076c5a6,0.460156
3,001349290,0.701877
4,001a52e92,0.950150
...,...,...
7970,ffbce04ef,0.999891
7971,ffc2d976b,0.461674
7972,ffc905909,0.409193
7973,ffe276f3e,0.423735


In [12]:
# test.to_csv('results/stacking_submission_iafossv5160_drhbexp200_ds19val1_aug04_ds14_ds15.csv', index=False)
test.to_csv(Path('results/ds_17_prep1')/'submission.csv', index=False)

# Trad blending

In [58]:
blend_df = dict(
    COMB7=pd.read_csv('input/predictions/COMB_7.csv')['target'], # LB 0.771
    # stacking_res=y_stack, # stack with only my model cv 0.739
    # drhb_exp200=prediction_df['drhb_exp200'], # LB 0.754 (add because it has no CV/LB corr)
    iafoss_5160=prediction_df['iafoss_v5160'], # LB 0.758
    iafoss_5210=prediction_df['iafoss_v5210'], # LB 0.757
    public761=pd.read_csv('input/public761.csv')['target']
)
blend_df = pd.DataFrame(blend_df)
blend_df = blend_df.rank(axis=0)
blend_df = blend_df / blend_df.max(axis=0).values[None, :]

In [59]:
blend_df.corr()

Unnamed: 0,COMB7,iafoss_5160,iafoss_5210,public761
COMB7,1.0,0.69075,0.679152,0.84002
iafoss_5160,0.69075,1.0,0.966798,0.678528
iafoss_5210,0.679152,0.966798,1.0,0.669006
public761,0.84002,0.678528,0.669006,1.0


In [62]:
blend_df['target'] = \
    blend_df['COMB7'] * 0.66 + blend_df['iafoss_5160'] * 0.11 + blend_df['iafoss_5210'] * 0.11 + blend_df['public761'] * 0.11

In [63]:
test['target'] = blend_df['target']
test.to_csv(Path('results/blend_comb7_iafoss5160_iafoss_5210_public761.csv'), index=False)