# Thanks to https://www.kaggle.com/siavrez/wavenet-keras, learned a lot.

In [11]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

import tensorflow as tf
from tensorflow.keras.layers import Conv1D, Input, Dense, Add, Multiply
import pandas as pd
import numpy as np
import random
from tensorflow.keras.callbacks import Callback, LearningRateScheduler
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras import losses, models, optimizers
import tensorflow_addons as tfa
import gc

from sklearn.model_selection import GroupKFold
from sklearn.metrics import f1_score

import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 500)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [12]:
# configurations and main hyperparammeters
EPOCHS = 110
NNBATCHSIZE = 16
GROUP_BATCH_SIZE = 4000
SEED = 321
LR = 0.001
SPLITS = 5

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)

In [13]:
# read data
def read_data():
    train = pd.read_csv('./data/train_clean_kalman.csv', dtype={'time': np.float32, 'signal': np.float32, 'open_channels':np.int32})
    test  = pd.read_csv('./data/test_clean_kalman.csv', dtype={'time': np.float32, 'signal': np.float32})
    sub  = pd.read_csv('./data/sample_submission.csv', dtype={'time': np.float32})
    return train, test, sub

# create batches of 4000 observations
def batching(df, batch_size):
    df['group'] = df.groupby(df.index//batch_size, sort=False)['signal'].agg(['ngroup']).values
    df['group'] = df['group'].astype(np.uint16)
    return df

# normalize the data (standard scaler). We can also try other scalers for a better score!
def normalize(train, test):
    train_input_mean = train.signal.mean()
    train_input_sigma = train.signal.std()
    train['signal'] = (train.signal - train_input_mean) / train_input_sigma
    test['signal'] = (test.signal - train_input_mean) / train_input_sigma
    return train, test

# get lead and lags features
def lag_with_pct_change(df, windows):
    for window in windows:    
        df['signal_shift_pos_' + str(window)] = df.groupby('group')['signal'].shift(window).fillna(0)
        df['signal_shift_neg_' + str(window)] = df.groupby('group')['signal'].shift(-1 * window).fillna(0)
    return df

# main module to run feature engineering. Here you may want to try and add other features and check if your score imporves :).
def run_feat_engineering(df, batch_size):
    # create batches
    df = batching(df, batch_size = batch_size)
    # create leads and lags (1, 2, 3 making them 6 features)
    df = lag_with_pct_change(df, [1, 2, 3])
    # create signal ** 2 (this is the new feature)
    df['signal_2'] = df['signal'] ** 2
    return df

# fillna with the mean and select features for training
def feature_selection(train, test):
    features = [col for col in train.columns if col not in ['index', 'group', 'open_channels', 'time']]
    train = train.replace([np.inf, -np.inf], np.nan)
    test = test.replace([np.inf, -np.inf], np.nan)
    for feature in features:
        feature_mean = pd.concat([train[feature], test[feature]], axis = 0).mean()
        train[feature] = train[feature].fillna(feature_mean)
        test[feature] = test[feature].fillna(feature_mean)
    return train, test, features

# model function (very important, you can try different arquitectures to get a better score. I believe that top public leaderboard is a 1D Conv + RNN style)
def Classifier(shape_):
    
    def wave_block(x, filters, kernel_size, n):
        dilation_rates = [2**i for i in range(n)]
        x = Conv1D(filters = filters,
                   kernel_size = 1,
                   padding = 'same')(x)
        res_x = x
        for dilation_rate in dilation_rates:
            tanh_out = Conv1D(filters = filters,
                              kernel_size = kernel_size,
                              padding = 'same', 
                              activation = 'tanh', 
                              dilation_rate = dilation_rate)(x)
            sigm_out = Conv1D(filters = filters,
                              kernel_size = kernel_size,
                              padding = 'same',
                              activation = 'sigmoid', 
                              dilation_rate = dilation_rate)(x)
            x = Multiply()([tanh_out, sigm_out])
            x = Conv1D(filters = filters,
                       kernel_size = 1,
                       padding = 'same')(x)
            res_x = Add()([res_x, x])
        return res_x
    
    inp = Input(shape = (shape_))
    
    x = wave_block(inp, 16, 3, 12)
    x = wave_block(x, 32, 3, 8)
    x = wave_block(x, 64, 3, 4)
    x = wave_block(x, 128, 3, 1)
    
    out = Dense(11, activation = 'softmax', name = 'out')(x)
    
    model = models.Model(inputs = inp, outputs = out)
    
    opt = Adam(lr = LR)
    opt = tfa.optimizers.SWA(opt)
    model.compile(loss = losses.CategoricalCrossentropy(), optimizer = opt, metrics = ['accuracy'])
    return model

# function that decrease the learning as epochs increase (i also change this part of the code)
def lr_schedule(epoch):
    if epoch < 30:
        lr = LR
    elif epoch < 40:
        lr = LR / 3
    elif epoch < 50:
        lr = LR / 5
    elif epoch < 60:
        lr = LR / 7
    elif epoch < 70:
        lr = LR / 9
    elif epoch < 80:
        lr = LR / 11
    elif epoch < 90:
        lr = LR / 13
    else:
        lr = LR / 100
    return lr

# class to get macro f1 score. This is not entirely necessary but it's fun to check f1 score of each epoch (be carefull, if you use this function early stopping callback will not work)
class MacroF1(Callback):
    def __init__(self, model, inputs, targets):
        self.model = model
        self.inputs = inputs
        self.targets = np.argmax(targets, axis = 2).reshape(-1)
        
    def on_epoch_end(self, epoch, logs):
        pred = np.argmax(self.model.predict(self.inputs), axis = 2).reshape(-1)
        score = f1_score(self.targets, pred, average = 'macro')
        print(f'F1 Macro Score: {score:.5f}')

# main function to perfrom groupkfold cross validation (we have 1000 vectores of 4000 rows and 8 features (columns)). Going to make 5 groups with this subgroups.
def run_cv_model_by_batch(train, test, splits, batch_col, feats, sample_submission, nn_epochs, nn_batch_size):
    
    seed_everything(SEED)
    K.clear_session()
    config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,inter_op_parallelism_threads=1)
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    oof_ = np.zeros((len(train), 11)) # build out of folds matrix with 11 columns, they represent our target variables classes (from 0 to 10)
    preds_ = np.zeros((len(test), 11))
    target = ['open_channels']
    group = train['group']
    kf = GroupKFold(n_splits=5)
    splits = [x for x in kf.split(train, train[target], group)]

    new_splits = []
    for sp in splits:
        new_split = []
        new_split.append(np.unique(group[sp[0]]))
        new_split.append(np.unique(group[sp[1]]))
        new_split.append(sp[1])    
        new_splits.append(new_split)
    # pivot target columns to transform the net to a multiclass classification estructure (you can also leave it in 1 vector with sparsecategoricalcrossentropy loss function)
    tr = pd.concat([pd.get_dummies(train.open_channels), train[['group']]], axis=1)

    tr.columns = ['target_'+str(i) for i in range(11)] + ['group']
    target_cols = ['target_'+str(i) for i in range(11)]
    train_tr = np.array(list(tr.groupby('group').apply(lambda x: x[target_cols].values))).astype(np.float32)
    train = np.array(list(train.groupby('group').apply(lambda x: x[feats].values)))
    test = np.array(list(test.groupby('group').apply(lambda x: x[feats].values)))

    for n_fold, (tr_idx, val_idx, val_orig_idx) in enumerate(new_splits[0:], start=0):
        train_x, train_y = train[tr_idx], train_tr[tr_idx]
        valid_x, valid_y = train[val_idx], train_tr[val_idx]
        print(f'Our training dataset shape is {train_x.shape}')
        print(f'Our validation dataset shape is {valid_x.shape}')

        gc.collect()
        shape_ = (None, train_x.shape[2]) # input is going to be the number of feature we are using (dimension 2 of 0, 1, 2)
        model = Classifier(shape_)
        # using our lr_schedule function
        cb_lr_schedule = LearningRateScheduler(lr_schedule)
        model.fit(train_x,train_y,
                  epochs = nn_epochs,
                  callbacks = [cb_lr_schedule, MacroF1(model, valid_x, valid_y)], # adding custom evaluation metric for each epoch
                  batch_size = nn_batch_size,verbose = 2,
                  validation_data = (valid_x,valid_y))
        preds_f = model.predict(valid_x)
        f1_score_ = f1_score(np.argmax(valid_y, axis=2).reshape(-1),  np.argmax(preds_f, axis=2).reshape(-1), average = 'macro') # need to get the class with the biggest probability
        print(f'Training fold {n_fold + 1} completed. macro f1 score : {f1_score_ :1.5f}')
        preds_f = preds_f.reshape(-1, preds_f.shape[-1])
        oof_[val_orig_idx,:] += preds_f
        te_preds = model.predict(test)
        te_preds = te_preds.reshape(-1, te_preds.shape[-1])           
        preds_ += te_preds / SPLITS
    # calculate the oof macro f1_score
    f1_score_ = f1_score(np.argmax(train_tr, axis = 2).reshape(-1),  np.argmax(oof_, axis = 1), average = 'macro') # axis 2 for the 3 Dimension array and axis 1 for the 2 Domension Array (extracting the best class)
    print(f'Training completed. oof macro f1 score : {f1_score_:1.5f}')
    sample_submission['open_channels'] = np.argmax(preds_, axis = 1).astype(int)
    sample_submission.to_csv('submission_wavenet.csv', index=False, float_format='%.4f')
    
# this function run our entire program
def run_everything():
    
    print('Reading Data Started...')
    train, test, sample_submission = read_data()
    train, test = normalize(train, test)
    print('Reading and Normalizing Data Completed')
        
    print('Creating Features')
    print('Feature Engineering Started...')
    train = run_feat_engineering(train, batch_size = GROUP_BATCH_SIZE)
    test = run_feat_engineering(test, batch_size = GROUP_BATCH_SIZE)
    train, test, features = feature_selection(train, test)
    print('Feature Engineering Completed...')
        
   
    print(f'Training Wavenet model with {SPLITS} folds of GroupKFold Started...')
    run_cv_model_by_batch(train, test, SPLITS, 'group', features, sample_submission, EPOCHS, NNBATCHSIZE)
    print('Training completed...')
        
run_everything()

Reading Data Started...
Reading and Normalizing Data Completed
Creating Features
Feature Engineering Started...
Feature Engineering Completed...
Training Wavenet model with 5 folds of GroupKFold Started...
Our training dataset shape is (1000, 4000, 8)
Our validation dataset shape is (250, 4000, 8)
Train on 1000 samples, validate on 250 samples
Epoch 1/110
F1 Macro Score: 0.78211
1000/1000 - 126s - loss: 0.7133 - accuracy: 0.7399 - val_loss: 0.3365 - val_accuracy: 0.8890
Epoch 2/110
F1 Macro Score: 0.90934
1000/1000 - 110s - loss: 0.2105 - accuracy: 0.9309 - val_loss: 0.1557 - val_accuracy: 0.9490
Epoch 3/110
F1 Macro Score: 0.92837
1000/1000 - 111s - loss: 0.1199 - accuracy: 0.9592 - val_loss: 0.1128 - val_accuracy: 0.9603
Epoch 4/110
F1 Macro Score: 0.93271
1000/1000 - 114s - loss: 0.1037 - accuracy: 0.9629 - val_loss: 0.1009 - val_accuracy: 0.9632
Epoch 5/110
F1 Macro Score: 0.93374
1000/1000 - 113s - loss: 0.1025 - accuracy: 0.9621 - val_loss: 0.0993 - val_accuracy: 0.9634
Epoch 6/1

Epoch 62/110
F1 Macro Score: 0.93778
1000/1000 - 112s - loss: 0.0801 - accuracy: 0.9690 - val_loss: 0.0851 - val_accuracy: 0.9668
Epoch 63/110
F1 Macro Score: 0.93814
1000/1000 - 113s - loss: 0.0801 - accuracy: 0.9690 - val_loss: 0.0847 - val_accuracy: 0.9669
Epoch 64/110
F1 Macro Score: 0.93823
1000/1000 - 112s - loss: 0.0800 - accuracy: 0.9690 - val_loss: 0.0848 - val_accuracy: 0.9669
Epoch 65/110
F1 Macro Score: 0.93797
1000/1000 - 113s - loss: 0.0801 - accuracy: 0.9690 - val_loss: 0.0850 - val_accuracy: 0.9668
Epoch 66/110
F1 Macro Score: 0.93815
1000/1000 - 113s - loss: 0.0800 - accuracy: 0.9690 - val_loss: 0.0848 - val_accuracy: 0.9668
Epoch 67/110
F1 Macro Score: 0.93817
1000/1000 - 113s - loss: 0.0801 - accuracy: 0.9690 - val_loss: 0.0850 - val_accuracy: 0.9668
Epoch 68/110
F1 Macro Score: 0.93824
1000/1000 - 111s - loss: 0.0800 - accuracy: 0.9690 - val_loss: 0.0849 - val_accuracy: 0.9668
Epoch 69/110
F1 Macro Score: 0.93837
1000/1000 - 111s - loss: 0.0800 - accuracy: 0.9690 - 

Epoch 14/110
F1 Macro Score: 0.93822
1000/1000 - 209s - loss: 0.0867 - accuracy: 0.9667 - val_loss: 0.0824 - val_accuracy: 0.9685
Epoch 15/110
F1 Macro Score: 0.93697
1000/1000 - 185s - loss: 0.0913 - accuracy: 0.9649 - val_loss: 0.0841 - val_accuracy: 0.9678
Epoch 16/110
F1 Macro Score: 0.93754
1000/1000 - 187s - loss: 0.0906 - accuracy: 0.9652 - val_loss: 0.0818 - val_accuracy: 0.9686
Epoch 17/110
F1 Macro Score: 0.93792
1000/1000 - 188s - loss: 0.0877 - accuracy: 0.9662 - val_loss: 0.0823 - val_accuracy: 0.9687
Epoch 18/110
F1 Macro Score: 0.93848
1000/1000 - 216s - loss: 0.0872 - accuracy: 0.9664 - val_loss: 0.0812 - val_accuracy: 0.9688
Epoch 19/110
F1 Macro Score: 0.93849
1000/1000 - 187s - loss: 0.0852 - accuracy: 0.9671 - val_loss: 0.0812 - val_accuracy: 0.9687
Epoch 20/110
F1 Macro Score: 0.88291
1000/1000 - 196s - loss: 0.1914 - accuracy: 0.9379 - val_loss: 0.1735 - val_accuracy: 0.9381
Epoch 21/110
F1 Macro Score: 0.93530
1000/1000 - 207s - loss: 0.1112 - accuracy: 0.9588 - 

F1 Macro Score: 0.93933
1000/1000 - 203s - loss: 0.0817 - accuracy: 0.9684 - val_loss: 0.0789 - val_accuracy: 0.9696
Epoch 78/110
F1 Macro Score: 0.93936
1000/1000 - 185s - loss: 0.0816 - accuracy: 0.9684 - val_loss: 0.0788 - val_accuracy: 0.9696
Epoch 79/110
F1 Macro Score: 0.93940
1000/1000 - 194s - loss: 0.0816 - accuracy: 0.9683 - val_loss: 0.0789 - val_accuracy: 0.9696
Epoch 80/110
F1 Macro Score: 0.93949
1000/1000 - 251s - loss: 0.0816 - accuracy: 0.9684 - val_loss: 0.0789 - val_accuracy: 0.9696
Epoch 81/110
F1 Macro Score: 0.93947
1000/1000 - 238s - loss: 0.0815 - accuracy: 0.9684 - val_loss: 0.0788 - val_accuracy: 0.9696
Epoch 82/110
F1 Macro Score: 0.93952
1000/1000 - 181s - loss: 0.0816 - accuracy: 0.9684 - val_loss: 0.0788 - val_accuracy: 0.9697
Epoch 83/110
F1 Macro Score: 0.93952
1000/1000 - 191s - loss: 0.0815 - accuracy: 0.9684 - val_loss: 0.0787 - val_accuracy: 0.9697
Epoch 84/110
F1 Macro Score: 0.93954
1000/1000 - 203s - loss: 0.0815 - accuracy: 0.9684 - val_loss: 0.0

Epoch 29/110
F1 Macro Score: 0.93599
1000/1000 - 117s - loss: 0.0832 - accuracy: 0.9678 - val_loss: 0.0832 - val_accuracy: 0.9680
Epoch 30/110
F1 Macro Score: 0.93665
1000/1000 - 114s - loss: 0.0837 - accuracy: 0.9676 - val_loss: 0.0828 - val_accuracy: 0.9682
Epoch 31/110
F1 Macro Score: 0.93714
1000/1000 - 116s - loss: 0.0822 - accuracy: 0.9682 - val_loss: 0.0821 - val_accuracy: 0.9684
Epoch 32/110
F1 Macro Score: 0.93712
1000/1000 - 115s - loss: 0.0821 - accuracy: 0.9682 - val_loss: 0.0820 - val_accuracy: 0.9684
Epoch 33/110
F1 Macro Score: 0.93713
1000/1000 - 117s - loss: 0.0820 - accuracy: 0.9683 - val_loss: 0.0820 - val_accuracy: 0.9684
Epoch 34/110
F1 Macro Score: 0.93697
1000/1000 - 116s - loss: 0.0819 - accuracy: 0.9683 - val_loss: 0.0819 - val_accuracy: 0.9683
Epoch 35/110
F1 Macro Score: 0.93693
1000/1000 - 116s - loss: 0.0819 - accuracy: 0.9683 - val_loss: 0.0820 - val_accuracy: 0.9684
Epoch 36/110
F1 Macro Score: 0.93697
1000/1000 - 6754s - loss: 0.0819 - accuracy: 0.9683 -

F1 Macro Score: 0.93775
1000/1000 - 116s - loss: 0.0798 - accuracy: 0.9691 - val_loss: 0.0806 - val_accuracy: 0.9689
Epoch 93/110
F1 Macro Score: 0.93769
1000/1000 - 121s - loss: 0.0798 - accuracy: 0.9691 - val_loss: 0.0806 - val_accuracy: 0.9689
Epoch 94/110
F1 Macro Score: 0.93768
1000/1000 - 115s - loss: 0.0798 - accuracy: 0.9691 - val_loss: 0.0806 - val_accuracy: 0.9688
Epoch 95/110
F1 Macro Score: 0.93772
1000/1000 - 117s - loss: 0.0798 - accuracy: 0.9691 - val_loss: 0.0806 - val_accuracy: 0.9688
Epoch 96/110
F1 Macro Score: 0.93765
1000/1000 - 118s - loss: 0.0798 - accuracy: 0.9691 - val_loss: 0.0806 - val_accuracy: 0.9688
Epoch 97/110
F1 Macro Score: 0.93777
1000/1000 - 115s - loss: 0.0798 - accuracy: 0.9691 - val_loss: 0.0806 - val_accuracy: 0.9689
Epoch 98/110
F1 Macro Score: 0.93767
1000/1000 - 115s - loss: 0.0798 - accuracy: 0.9691 - val_loss: 0.0806 - val_accuracy: 0.9688
Epoch 99/110
F1 Macro Score: 0.93777
1000/1000 - 115s - loss: 0.0798 - accuracy: 0.9691 - val_loss: 0.0

Epoch 44/110
F1 Macro Score: 0.93793
1000/1000 - 120s - loss: 0.0808 - accuracy: 0.9687 - val_loss: 0.0836 - val_accuracy: 0.9678
Epoch 45/110
F1 Macro Score: 0.93791
1000/1000 - 116s - loss: 0.0806 - accuracy: 0.9687 - val_loss: 0.0837 - val_accuracy: 0.9678
Epoch 46/110
F1 Macro Score: 0.93781
1000/1000 - 116s - loss: 0.0807 - accuracy: 0.9687 - val_loss: 0.0836 - val_accuracy: 0.9677
Epoch 47/110
F1 Macro Score: 0.93770
1000/1000 - 114s - loss: 0.0806 - accuracy: 0.9687 - val_loss: 0.0835 - val_accuracy: 0.9677
Epoch 48/110
F1 Macro Score: 0.93771
1000/1000 - 120s - loss: 0.0806 - accuracy: 0.9687 - val_loss: 0.0836 - val_accuracy: 0.9677
Epoch 49/110
F1 Macro Score: 0.93802
1000/1000 - 121s - loss: 0.0806 - accuracy: 0.9687 - val_loss: 0.0836 - val_accuracy: 0.9678
Epoch 50/110
F1 Macro Score: 0.93754
1000/1000 - 121s - loss: 0.0805 - accuracy: 0.9688 - val_loss: 0.0837 - val_accuracy: 0.9677
Epoch 51/110
F1 Macro Score: 0.93770
1000/1000 - 114s - loss: 0.0804 - accuracy: 0.9688 - 

Epoch 107/110
F1 Macro Score: 0.93818
1000/1000 - 124s - loss: 0.0792 - accuracy: 0.9692 - val_loss: 0.0829 - val_accuracy: 0.9680
Epoch 108/110
F1 Macro Score: 0.93808
1000/1000 - 122s - loss: 0.0792 - accuracy: 0.9692 - val_loss: 0.0829 - val_accuracy: 0.9680
Epoch 109/110
F1 Macro Score: 0.93806
1000/1000 - 117s - loss: 0.0792 - accuracy: 0.9692 - val_loss: 0.0829 - val_accuracy: 0.9679
Epoch 110/110
F1 Macro Score: 0.93811
1000/1000 - 127s - loss: 0.0792 - accuracy: 0.9692 - val_loss: 0.0829 - val_accuracy: 0.9680
Training fold 4 completed. macro f1 score : 0.93811
Our training dataset shape is (1000, 4000, 8)
Our validation dataset shape is (250, 4000, 8)
Train on 1000 samples, validate on 250 samples
Epoch 1/110
F1 Macro Score: 0.80326
1000/1000 - 137s - loss: 0.7719 - accuracy: 0.7034 - val_loss: 0.3715 - val_accuracy: 0.8759
Epoch 2/110
F1 Macro Score: 0.91653
1000/1000 - 117s - loss: 0.2444 - accuracy: 0.9229 - val_loss: 0.1540 - val_accuracy: 0.9527
Epoch 3/110
F1 Macro Score

Epoch 59/110
F1 Macro Score: 0.93834
1000/1000 - 116s - loss: 0.0806 - accuracy: 0.9688 - val_loss: 0.0824 - val_accuracy: 0.9680
Epoch 60/110
F1 Macro Score: 0.93773
1000/1000 - 116s - loss: 0.0805 - accuracy: 0.9688 - val_loss: 0.0824 - val_accuracy: 0.9679
Epoch 61/110
F1 Macro Score: 0.93835
1000/1000 - 115s - loss: 0.0805 - accuracy: 0.9688 - val_loss: 0.0824 - val_accuracy: 0.9681
Epoch 62/110
F1 Macro Score: 0.93835
1000/1000 - 116s - loss: 0.0804 - accuracy: 0.9689 - val_loss: 0.0823 - val_accuracy: 0.9681
Epoch 63/110
F1 Macro Score: 0.93824
1000/1000 - 117s - loss: 0.0803 - accuracy: 0.9689 - val_loss: 0.0822 - val_accuracy: 0.9681
Epoch 64/110
F1 Macro Score: 0.93838
1000/1000 - 115s - loss: 0.0803 - accuracy: 0.9689 - val_loss: 0.0822 - val_accuracy: 0.9681
Epoch 65/110
F1 Macro Score: 0.93834
1000/1000 - 116s - loss: 0.0803 - accuracy: 0.9689 - val_loss: 0.0825 - val_accuracy: 0.9680
Epoch 66/110
F1 Macro Score: 0.93807
1000/1000 - 117s - loss: 0.0803 - accuracy: 0.9689 - 