# Keras Neural Net Modeling

## Summary
Lets make some NN

## Baseline NN

In [26]:
# Import statements
import keras
from keras.layers import Dense
import pandas as pd
from imblearn.over_sampling import RandomOverSampler

# Created functions from functions.py
from functions import metrics as custom_score
from functions import improvement as custom_change

In [3]:
# Load in cleaned data

# Training Data
X_train = pd.read_csv('../Data/train/X_train.csv', index_col=0)
y_train = pd.read_csv('../Data/train/y_train.csv', index_col=0)

# Testing Data
X_test = pd.read_csv('../Data/test/X_test.csv', index_col=0)
y_test = pd.read_csv('../Data/test/y_test.csv', index_col=0)

In [34]:
 # Instantiating a NN
FSM_NN = keras.Sequential()

# Starting small with 30 neurons
FSM_NN.add(Dense(30, 'relu', input_shape=(422,)))

# 1 output
FSM_NN.add(Dense(1, 'sigmoid'))

# Compiling model with accuracy, precision, and recall metrics. Using "Adam" as an optimizer
FSM_NN.compile('adam', 'binary_crossentropy', metrics=['acc', 'Precision', 'Recall','AUC'])

FSM_NN.fit(X_train, y_train, epochs=10, steps_per_epoch=100, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x148a29f10>

In [22]:
## Evaluating NN
FSM_loss, FSM_acc, FSM_prec, FSM_recall, FSM_AUC = FSM_NN.evaluate(X_test, y_test)

results_FSM = {
    'Accuracy': FSM_acc,
    'Precision': FSM_prec,
    'Recall': FSM_recall,
    'ROCAUC': FSM_AUC

}



### Conclusion
The neural network is boasting some impressively bad precision here, but it is doing it's best with the 30 neurons I provided it. This can definitely be improved, so let's start by giving it more brain power.

## 3 Layer NN and more Training Time
I'll add some more layers, and give the model more time to train and see if that gives us an improvement.

In [19]:
# Instantiating a NN
NN = keras.Sequential()

# 3 layers, double the size at each layer.
NN.add(Dense(32, 'relu', input_shape=(422,)))
NN.add(Dense(64, 'relu'))
NN.add(Dense(128, 'relu'))

# 1 output
NN.add(Dense(1, 'sigmoid'))

NN.compile('adam', 'binary_crossentropy', metrics=['acc', 'Precision', 'Recall','AUC'])

NN.fit(X_train, y_train, epochs=25, steps_per_epoch=100, validation_data=(X_test, y_test))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x148f2a490>

In [31]:
NN_loss, NN_acc, NN_prec, NN_recall, NN_AUC = NN.evaluate(X_test, y_test)
results_NN = {
    'Accuracy': NN_acc,
    'Precision': NN_prec,
    'Recall': NN_recall,
    'ROCAUC': NN_AUC

}



In [25]:
custom_change(results_FSM, results_NN)

Change in Results
Accuracy        +0.09
Precision       +0.35
Recall          -0.62
ROCAUC          +0.06


### Analysis
An improvement in accuracy, precision and ROCAUC, but a huge drop in Recall. This probably has to do with the data imbalance, I'll address it using oversampling, the same way I did with the Catboost model.

# Oversample Nerual Network

In [35]:
# Initiate Over sampler
ros = RandomOverSampler(random_state=15)

# Applying ONLY to training set to prevent data leakage.
X_train_os, y_train_os = ros.fit_resample(X_train, y_train)

In [36]:
# Instantiating a OS NN
OS_NN = keras.Sequential()

# 3 layers, double the size at each layer.
OS_NN.add(Dense(32, 'relu', input_shape=(422,)))
OS_NN.add(Dense(64, 'relu'))
OS_NN.add(Dense(128, 'relu'))

# 1 output
OS_NN.add(Dense(1, 'sigmoid'))

OS_NN.compile('adam', 'binary_crossentropy', metrics=['acc', 'Precision', 'Recall','AUC'])

OS_NN.fit(X_train_os, y_train_os, epochs=25, steps_per_epoch=100, validation_data=(X_test, y_test))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x147c09970>

In [37]:
OS_NN_loss, OS_NN_acc, OS_NN_prec, OS_NN_recall, OS_NN_AUC = OS_NN.evaluate(X_test, y_test)
results_OS_NN = {
    'Accuracy': OS_NN_acc,
    'Precision': OS_NN_prec,
    'Recall': OS_NN_recall,
    'ROCAUC': OS_NN_AUC

}



In [38]:
custom_change(results_NN, results_OS_NN)

Change in Results
Accuracy        -0.02
Precision       -0.23
Recall          +0.52
ROCAUC          -0.01


### Analysis
The oversample seems to have balanced things out. We have decent AUC so we aren't far off the mark, but precision is now below 0.5 which is a bit rough. There is also a large difference between the training scores and testing scores, so our model is overfitting, let's just go ahead and adjust that now.

# Stop Overfittingh.