In [1]:
%matplotlib notebook

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from imblearn.over_sampling import SMOTE
from collections import Counter

from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import balanced_accuracy_score
from scipy.stats import uniform, randint, loguniform, norm

from tensorflow.python.keras.callbacks import TensorBoard
from time import time 

# Pre-processing

In [2]:
# Read the data
data = pd.read_csv('C:/Users/vabalagon/Desktop/Meta/New Workflow/data/2 data for modeling (With PCA).csv')

# Get the features and target variable from the dataframe
X = data.drop(['Survey ID', 'Response Date', 'Likelihood to Recommend'], axis=1).to_numpy()
y = data['Likelihood to Recommend'].to_numpy()

# Split the data into test and train sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X.copy(), y, test_size = 0.25, shuffle=True, random_state=42) #, stratify=y_smote

# Apply SMOTE oversampling to the TRAINING SET ONLY
sm = SMOTE(random_state=42)
X_train_smote, y_train_smote = sm.fit_resample(X_train, y_train)
print('Resampled dataset shape %s' % Counter(y_train_smote))

Resampled dataset shape Counter({0: 8116, 2: 8116, 1: 8116})


##### Get a validation set from the training set

In [3]:
X_val = X_train_smote[:2700]
y_val = y_train_smote[:2700]

X_train_smote = X_train_smote[2700:]
y_train_smote = y_train_smote[2700:]

# Deep Neural Network

In [4]:
from tensorflow import keras

keras.__version__

'2.9.0'

##### Single hidden layer

In [5]:
λ=0.00001 #.01
drop_out_proba=0.2 #.8
dnn_model = keras.models.Sequential([
                                        keras.layers.Dense(9, input_dim = 9),
    
                                        keras.layers.Dense(30, activation='relu', 
                                                           activity_regularizer=keras.regularizers.l2(l=λ)),
                                        #keras.layers.Dropout(drop_out_proba),
                                        #keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001),
                    
    
                                        keras.layers.Dense(50, activation='relu', 
                                                           activity_regularizer=keras.regularizers.l2(l=λ) ),
                                        #keras.layers.Dropout(drop_out_proba),
                                        #keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001),
    
                                        keras.layers.Dense(30, activation='relu', 
                                                           activity_regularizer=keras.regularizers.l2(l=λ)),
                                        #keras.layers.Dropout(drop_out_proba),
                                        #keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001),
    
                                        keras.layers.Dense(3, activation='softmax'),
])

# Tensorboard instance
tensorboard = TensorBoard(log_dir='logs/{}'.format(time()))

# Compiles the neural network mode, specifies the loss function, optimization method, and the accuracy
dnn_model.compile(loss="sparse_categorical_crossentropy",
                optimizer="Adam",
                metrics=['accuracy']) #balanced_accuracy_score

history = dnn_model.fit(X_train_smote, y_train_smote, 
                        epochs=70, 
                        validation_data=(X_val, y_val),
                       callbacks=[tensorboard])

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
 66/677 [=>............................] - ETA: 1:42 - loss: 0.9423 - accuracy: 0.4787

KeyboardInterrupt: 

#### Evaluate the model

In [None]:
history.history.keys()

In [None]:
plt.figure()
plt.plot(history.history['loss'], label='Train', lw=.7)
plt.plot(history.history['val_loss'], label='Validation', lw=.7)
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend()

In [None]:
plt.figure()
plt.plot(history.history['accuracy'], label='Train', lw=.7, c='r')
plt.plot(history.history['val_accuracy'], label='Validation', lw=.7, c='g')
plt.xlabel('Iterations')
plt.ylabel('Accuracy')
plt.ylim(.20, 1)
plt.legend()

##### Balanced accuracy

In [None]:
y_test_pred = dnn_model.predict(X_test)
y_test_pred = np.array([np.argmax(y_i) for y_i in y_test_pred])


print("Test set balanced accuracy score:", balanced_accuracy_score(y_test, y_test_pred))

##### Accuracy per class

In [None]:
for y_i in np.unique(y_test)[::-1]:
    print('class: ', y_i)
    
    # Find the indices of y_i in the true labels
    indices_i = np.where(y_test == y_i)
    
    # Computes the accuracy
    print('Accuracy: ', str(round(np.sum(y_test[indices_i] == y_test_pred[indices_i])/ len(np.where(y_test==y_i)[0]), 3)))