In [1]:
import numpy as np 
import pandas as pd

In [2]:
def create_train_test_groups():
    train_groups = {
        0:[(0,1000000)],
        1:[(1500000,2000000),(3500000,4000000)],
        2:[(2500000,3000000),(4000000,4500000)],
        3:[(1000000,1500000),(3000000,3500000)],
        4:[(2000000,2500000),(4500000,5000000)]
    }
    train_signal_group = np.full(5000000, -1)
    for k,v in train_groups.items():
        for l,r in v:
            train_signal_group[l:r] = k


    test_df_signal_group = np.full(2000000, -1)
    test_groups = {
        0:[(0,100000)],
        1:[(100000,200000),(900000,1000000)],
        2:[(200000,300000),(600000,700000)],
        3:[(400000,500000)],
        4:[(500000,600000),(700000,800000)],
        5:[(300000,400000),(800000,900000),(1000000,2000000)]
    }
    for k,v in test_groups.items():
        for l,r in v:
            test_df_signal_group[l:r] = k
            
    return train_signal_group, test_df_signal_group

train_signal_group, test_df_signal_group = create_train_test_groups()

In [3]:
def save_submission(y_test, name):
    y_test = np.argmax(y_test, axis=1)
    submission = pd.read_csv("../input/liverpool-ion-switching/sample_submission.csv")
    submission["open_channels"] = np.asarray(y_test, dtype=np.int32)
    submission.to_csv(f"{name}.csv", index=False, float_format="%.4f")

In [4]:
oof_preds = np.load('../wavenet-lstm-v2-keras/oof_wavenet_probs.npy')
test_preds = np.load('../wavenet-lstm-v2-keras/test_wavenet_probs.npy')

In [5]:
g4_oof_preds = np.load('oof_wavenet_probs.npy')
g4_test_preds = np.load('test_wavenet_probs.npy')

print(g4_oof_preds.shape, g4_test_preds.shape)

g4_oof_preds = np.concatenate(
    [np.zeros((len(g4_oof_preds),1)), g4_oof_preds], axis=1)
g4_test_preds = np.concatenate(
    [np.zeros((len(g4_test_preds),1)), g4_test_preds], axis=1)

print(g4_oof_preds.shape, g4_test_preds.shape)

(1000000, 10) (200000, 10)
(1000000, 11) (200000, 11)


In [6]:
oof_pred_mixed = oof_preds.copy()
test_pred_mixed = test_preds.copy()


oof_pred_mixed[2000000:2500000] = g4_oof_preds[:500000]
oof_pred_mixed[4500000:5000000] = g4_oof_preds[500000:]


test_pred_mixed[500000:600000] = g4_test_preds[:100000]
test_pred_mixed[700000:800000] = g4_test_preds[100000:]

In [7]:
from sklearn.metrics import f1_score
y_true = pd.read_csv("../input/liverpool-ion-switching/train.csv").open_channels.values

In [11]:
def groups_f1(y_true, y_pred):
    train_signal_group, test_df_signal_group = create_train_test_groups()
    
    for i in range(5):
        f1 = f1_score(y_true[train_signal_group==i], y_pred[train_signal_group==i], average='macro')
        
        if i == 4:
            f1_ = f1_score(y_true[train_signal_group==i],y_pred[train_signal_group==i], average='macro', labels=np.arange(1, 11))
            print(f'Group {i} {f1} ({f1_})')
        else:
            print(f'Group {i} {f1}')
            
    print('Total', f1_score(y_true, y_pred, average='macro'))

In [12]:
#Best model
groups_f1(y_true, oof_preds.argmax(axis=1))

Group 0, 0.6648954577448117
Group 1, 0.6498282485115017
Group 2, 0.9731941791223476
Group 3, 0.9970441134441017
Group 4, 0.7855970443669061, (0.8641567488035967)
Total 0.9407024419353109


In [13]:
groups_f1(y_true, np.full(5000000, 0))

Group 0, 0.49104130912318633
Group 1, 0.011829351162627217
Group 2, 0.0006397697228230114
Group 3, 0.19948703128965042
Group 4, 3.6363563636509083e-07, (0.0)
Total 0.03613408484571879


In [68]:
f1_score(y_true, oof_preds.argmax(axis=1), average='macro')

0.9407024419353109

In [45]:
print('Only 10 chanels oof on group 4')

f1_score(y_true[train_signal_group==4], 
         g4_oof_preds.argmax(axis=1), average='macro', labels=np.arange(1, 11))

Only 10 chanels oof on group 4


0.8533231100602989

In [47]:
print('Only 10 chanels oof on group 4')
a = y_true[train_signal_group==4]
a[a==0]=1
f1_score(a, 
         g4_oof_preds.argmax(axis=1), average='macro', labels=np.arange(1, 11))

Only 10 chanels oof on group 4


0.8543389830761721

In [None]:
!kaggle competitions submit -c liverpool-ion-switching -f blend_new_wavenets.csv -m "np.mean([new_wavenet_and_feats, new_wavenet], axis=0)"

In [29]:
def count_by_groups(y_true,y_pred, test_pred):
    train_signal_group, test_df_signal_group = create_train_test_groups()
    y = np.arange(11)
    for i in range(5):
        print(i)
        print('y_true',list(np.bincount(y_true[train_signal_group == i], minlength=11)))
        print('y_pred',list(np.bincount(y_pred[train_signal_group == i], minlength=11)))
        print('test_p',list(np.bincount(test_pred[test_df_signal_group == i],minlength=11)))
    
    
count_by_groups(y_true,oof_preds.argmax(axis=1), test_preds.argmax(axis=1))

0
y_true [964796, 35204, 0, 0, 0, 0, 0, 0, 0, 0, 0]
y_pred [964778, 35221, 0, 1, 0, 0, 0, 0, 0, 0, 0]
test_p [92669, 7330, 0, 1, 0, 0, 0, 0, 0, 0, 0]
1
y_true [24232, 176123, 433542, 366103, 0, 0, 0, 0, 0, 0, 0]
y_pred [23873, 175539, 435033, 365553, 1, 0, 0, 0, 0, 0, 1]
test_p [4241, 34086, 86966, 74706, 0, 1, 0, 0, 0, 0, 0]
2
y_true [1923, 23611, 119295, 295567, 373428, 186176, 0, 0, 0, 0, 0]
y_pred [1902, 23484, 119208, 295412, 373649, 186345, 0, 0, 0, 0, 0]
test_p [409, 5149, 24084, 58538, 74036, 37750, 14, 18, 1, 1, 0]
3
y_true [249199, 750801, 0, 0, 0, 0, 0, 0, 0, 0, 0]
y_pred [249141, 750859, 0, 0, 0, 0, 0, 0, 0, 0, 0]
test_p [25998, 74001, 0, 1, 0, 0, 0, 0, 0, 0, 0]
4
y_true [2, 126, 1087, 6939, 29982, 91701, 188112, 265015, 245183, 136120, 35733]
y_pred [0, 101, 1083, 6651, 29629, 90171, 188306, 266875, 246245, 136047, 34892]
test_p [0, 21, 239, 1475, 6390, 18185, 37205, 52866, 49412, 27099, 7108]


In [32]:
pd.read_csv("../input/liverpool-ion-switching/train.csv").open_channels.unique()

array([ 0,  1,  3,  2, 10,  9,  8,  7,  6,  5,  4])