In [1]:
import numpy as np 
import pandas as pd 
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.utils import class_weight
from sklearn.utils import shuffle
from keras.utils.np_utils import to_categorical

In [2]:
# Load Data
PTBD_train_data = pd.read_csv('/kaggle/input/ptbd-data/PTBD_train.csv')
PTBD_test_data = pd.read_csv('/kaggle/input/ptbd-data/PTBD_test.csv')

In [3]:
def pre_process(train):
    X_train = np.asarray(train.drop(columns=['187']))
    y_train = np.asarray(train['187'])
    y_train_cat = to_categorical(y_train)
    return X_train,y_train,y_train_cat

In [4]:
X_train_ptbd, labels_ptbd, y_train_ptbd = pre_process(PTBD_train_data)
X_train_new_ptbd = X_train_ptbd.reshape(X_train_ptbd.shape[0],187,1)

X_test_ptbd, test_labels_ptbd, y_test_ptbd = pre_process(PTBD_test_data)
X_test_new_ptbd = X_test_ptbd.reshape(X_test_ptbd.shape[0],187,1)

In [5]:

from keras import layers
from keras.layers import Dense, Dropout, BatchNormalization, Flatten
from keras.layers import Conv1D, GlobalAveragePooling1D, AveragePooling1D
from keras.models import Sequential,save_model,load_model
from keras import optimizers
from keras.callbacks import LearningRateScheduler
import tensorflow as tf

In [6]:
def build_model():
    model = Sequential()
    
    # Convolutional block 1
    model.add(Conv1D(256,2,strides=1,activation = 'relu',input_shape=(187,1)))
    model.add(Conv1D(256,2,strides=1,activation = 'relu'))
    #model.add(Conv1D(256,2,strides=1,activation = 'relu'))

    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    # Convolutional block 2
    model.add(Conv1D(256,2,strides=1,activation = 'relu'))
    model.add(Conv1D(256,2,strides=1,activation = 'relu'))
    #model.add(Conv1D(256,2,strides=1,activation = 'relu'))

    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    
    model.add(GlobalAveragePooling1D())
              
    model.add(Dense(128,activation = 'relu'))
    
    model.add(Dense(2, activation='softmax'))          
    
    return model


In [7]:
# Fine tunes the learning after the 80th epoch
def lr_decay(epoch):
    if epoch < 80:
        learn_rate = 0.001
    else: 
        learn_rate = 0.0001
    return learn_rate

In [8]:
""" Define focal loss function as per 
    https://www.dlology.com/blog/
    multi-class-classification-with-focal-loss-for-imbalanced-datasets/""" 
def focal_loss_fnc(gamma,alpha):
    
    gamma = float(gamma)
    alpha = float(alpha)
    
    def focal_loss_fixed(y_true, y_pred):
        """Focal loss for multi-classification
        FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t)
        Notice: y_pred is probability after softmax
        gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper
        d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x)
        Focal Loss for Dense Object Detection
        https://arxiv.org/abs/1708.02002

        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]
       Keyword Arguments:
            gamma {float} -- (default: {2.0})
            alpha {float} -- (default: {4.0})

        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        y_true = tf.convert_to_tensor(y_true, tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, tf.float32)

        model_out = tf.add(y_pred, epsilon)
        ce = tf.multiply(y_true, -tf.math.log(model_out))
        weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma))
        fl = tf.multiply(alpha, tf.multiply(weight, ce))
        reduced_fl = tf.reduce_max(fl, axis=1)
        return tf.reduce_mean(reduced_fl)
    return focal_loss_fixed


In [9]:
def train_model(model,X_train,y_train,X_valid,y_valid,filename,n_epochs=100,batch_size=400,
                learn_rate=0.001,focal_loss=True,gamma=2,alpha=0.25):
    
    if focal_loss:
        loss = focal_loss_fnc(gamma,alpha) # still need to define
        
    else:
        loss = 'categorical_crossentropy'
    
    adam = optimizers.Adam(learning_rate=learn_rate)
    
    # may want to change metrics later
    model.compile(optimizer=adam,loss=loss,metrics=['accuracy'])
    
    # Reduce learning rate to 0.0001 after 80th epoch
    callbacks = LearningRateScheduler(lr_decay)
        
    history = model.fit(X_train,y_train,validation_data=(X_valid,y_valid), epochs=n_epochs,batch_size=batch_size,callbacks=callbacks)
    
    print("\nEvaluating...", flush=True)
    print('Training data:', flush=True)
    loss, acc = model.evaluate(X_train, y_train, verbose=1)
    print("  Training : loss %.3f - acc %.3f" % (loss, acc))
    
    print('Saving model...', flush=True)
    save_model(
    model,
    filename,
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None
    )
    
    return model, history

In [10]:
filename = "model_ptbd_wout_focal_loss"
model_wout_focal_loss,history_wout_focal_loss = train_model(build_model(),
                                                            X_train_new_ptbd,
                                                            y_train_ptbd,
                                                            X_test_new_ptbd,
                                                            y_test_ptbd,
                                                            filename,
                                                            focal_loss=False
                                                            )
hist_df_wout_focal_loss = pd.DataFrame(history_wout_focal_loss.history)
hist_df_wout_focal_loss.to_csv("history_wout_focal_loss.csv", index=False)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78