In [12]:
#import libraries
import tensorflow as tf
from tensorflow import keras
import numpy as np
import keras 
import pydot
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
from imblearn.over_sampling import SMOTENC
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import StratifiedKFold
import os
import shap

In [13]:
def init_nn_model(dimensions):
    nn_model = Sequential()
    nn_model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.001), input_dim=dimensions, activation='relu' ))
    nn_model.add(Dropout(rate=0.2))
    nn_model.add(Dense(8,kernel_regularizer=tf.keras.regularizers.l2(0.001),activation='relu'))
    nn_model.add(Dropout(rate=0.1))
    nn_model.add(Dense(1, activation='sigmoid'))
    
def init_lr_schedule(x_train):        
    lr_schedule= tf.keras.optimizers.schedules.InverseTimeDecay( 0.001, decay_steps=(x_train.shape[0]/32)*50,decay_rate=1,staircase=False)

def get_optimizer():
    return tf.keras.optimizers.Adam(lr_schedule)

def train_model(x_train ,y_train ,x_test ,y_test, dimensions):
    init_nn_model(dimensions)
    init_lr_schedule(x_train)
    nn_model.compile(loss = "binary_crossentropy",  optimizer = get_optimizer(),  metrics=['accuracy'])
    callback =tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=70,restore_best_weights=True)
    history = nn_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=150, batch_size=10,verbose=0,callbacks=[callback])
    y_pred = nn_model.predict(x_test)
    return y_pred

# Dataset 1

In [15]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'1-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'1-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'1-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'1-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,20)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.92
precision  : 0.74
recall  : 0.69
f1 score  : 0.71
confusion matrix : 
 [[1048   45]
 [  56  126]]


# Dataset 2

In [16]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'2-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'2-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'2-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'2-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,19)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.93
precision  : 0.78
recall  : 0.74
f1 score  : 0.76
confusion matrix : 
 [[1054   38]
 [  47  136]]


# Dataset 3

In [17]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'3-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'3-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'3-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'3-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,19)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.94
precision  : 0.79
recall  : 0.78
f1 score  : 0.78
confusion matrix : 
 [[1050   38]
 [  42  145]]


# Dataset 4

In [18]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'4-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'4-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'4-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'4-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,10)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.93
precision  : 0.75
recall  : 0.77
f1 score  : 0.76
confusion matrix : 
 [[1059   44]
 [  39  133]]


# Dataset 5

In [19]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'5-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'5-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'5-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'5-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,9)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.93
precision  : 0.77
recall  : 0.74
f1 score  : 0.75
confusion matrix : 
 [[1044   41]
 [  50  140]]


# Dataset 6

In [20]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'6-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'6-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'6-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'6-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,17)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.93
precision  : 0.76
recall  : 0.77
f1 score  : 0.76
confusion matrix : 
 [[1047   45]
 [  42  141]]


# Dataset 7

In [21]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'7-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'7-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'7-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'7-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,13)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.92
precision  : 0.70
recall  : 0.80
f1 score  : 0.75
confusion matrix : 
 [[1035   61]
 [  36  143]]


# Dataset 8

In [22]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'8-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'8-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'8-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'8-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,20)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.93
precision  : 0.81
recall  : 0.72
f1 score  : 0.76
confusion matrix : 
 [[1059   32]
 [  51  133]]


# Dataset 9

In [23]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'9-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'9-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'9-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'9-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,14)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.94
precision  : 0.74
recall  : 0.82
f1 score  : 0.78
confusion matrix : 
 [[1066   46]
 [  29  134]]


# Dataset 10

In [24]:
#Read data
proccessed_data_path =os.path.join(os.path.pardir,'data','processed')
x_train_path = os.path.join(proccessed_data_path,'10-train-x.csv')
x_test_path = os.path.join(proccessed_data_path,'10-test-x.csv')
y_train_path = os.path.join(proccessed_data_path,'10-train-y.csv')
y_test_path = os.path.join(proccessed_data_path,'10-test-y.csv')

dfx = pd.read_csv(x_train_path)
dfxt = pd.read_csv(x_test_path)
dfy = pd.read_csv(y_train_path)
dfyt = pd.read_csv(y_test_path)

x_train = dfx.drop(columns=['Unnamed: 0'],axis = 'columns')
x_test = dfxt.drop(columns=['Unnamed: 0'],axis = 'columns')
y_train = dfy.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()
y_test = dfyt.drop(columns=['Unnamed: 0'],axis = 'columns').values.flatten()

y_pred = train_model(x_train ,y_train ,x_test ,y_test ,5)

print ('accuracy : {0:.2f}'.format(accuracy_score(y_test,y_pred.round())))
print ('precision  : {0:.2f}'.format(precision_score(y_test,y_pred.round())))
print ('recall  : {0:.2f}'.format(recall_score(y_test,y_pred.round())))
print ('f1 score  : {0:.2f}'.format(f1_score(y_test,y_pred.round())))
print ('confusion matrix : \n {0}'.format(confusion_matrix(y_test,y_pred.round())))

accuracy : 0.93
precision  : 0.76
recall  : 0.76
f1 score  : 0.76
confusion matrix : 
 [[1062   41]
 [  42  130]]
