# Supervised Classification Learning Algorithms
## Neural Networks

### Importing Necessary Libraries


In [1]:
%matplotlib inline
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose
import sklearn
import warnings
import matplotlib
import numpy as np
import mlrose_hiive
import pandas as pd
from functools import partial
from sklearn.base import clone
from sklearn import set_config
import matplotlib.pyplot as plt
from sklearn import preprocessing
from mlrose_hiive import ExpDecay
from statistics import mean, stdev
from IPython.display import display
from sklearn.metrics import f1_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold, StratifiedShuffleSplit, validation_curve, learning_curve
from sklearn.metrics import roc_curve, precision_score, accuracy_score, average_precision_score, recall_score, f1_score, classification_report, confusion_matrix
np.random.seed(44)

### Setting Graph Format Defaults

In [2]:
# Specify printing and plot dimensions 
pd.options.display.max_columns = None
matplotlib.rc('figure', figsize=[10,5])

# Initializing Random Seed to make experiments reproducible
np.random.seed(1126)

# setting config to print all parameters
set_config(print_changed_only=False)

### Uploading and Pre-process Data

In [3]:
# Upload credit Dataset
creditdata = pd.read_csv('creditdata.csv')
creditdata = creditdata.iloc[: , 1:]
newheader = list(creditdata.loc[0])
creditdata = creditdata.drop(labels=0, axis=0)
creditdata.columns = newheader
creditdata.reset_index(inplace=True)
creditdata = creditdata.astype('float64')
creditdata.head()

Unnamed: 0,index,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month
0,1.0,20000.0,2.0,2.0,1.0,24.0,2.0,2.0,-1.0,-1.0,-2.0,-2.0,3913.0,3102.0,689.0,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1.0
1,2.0,120000.0,2.0,2.0,2.0,26.0,-1.0,2.0,0.0,0.0,0.0,2.0,2682.0,1725.0,2682.0,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1.0
2,3.0,90000.0,2.0,2.0,2.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,29239.0,14027.0,13559.0,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0.0
3,4.0,50000.0,2.0,2.0,1.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,46990.0,48233.0,49291.0,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0.0
4,5.0,50000.0,1.0,2.0,1.0,57.0,-1.0,0.0,-1.0,0.0,0.0,0.0,8617.0,5670.0,35835.0,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0.0


### Data Preprocessing (Normalizing and Assigning Features and Target variables)

In [4]:
#split Credit dataset in features and target variable
credit_feature_cols = ['LIMIT_BAL','SEX','EDUCATION','MARRIAGE','AGE','PAY_0','PAY_2','PAY_3','PAY_4','PAY_5','PAY_6','BILL_AMT1','BILL_AMT2','BILL_AMT3','BILL_AMT4','BILL_AMT5','BILL_AMT6','PAY_AMT1','PAY_AMT2','PAY_AMT3','PAY_AMT4','PAY_AMT5','PAY_AMT6']
creditX = creditdata.reindex(columns =credit_feature_cols) # Features
# print('creditX head')
# print(creditX.head())
# print(creditX.info())
# print()
creditY = creditdata.reindex(columns =['default payment next month']) # Target variable
# print( 'CreditY Value Counts before replacement')
# print(creditY.value_counts())
# print()
# All the credit non-defaulters = 1, all the credit defaulters= -1
creditY = creditY.replace(['0','1'], [1,-1],regex=True)
creditY = creditY.astype('int64')
# print( 'CreditY Value Counts after replacement')
# print(creditY.value_counts())
# print()
creditX.info(), creditY.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 23 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   LIMIT_BAL  30000 non-null  float64
 1   SEX        30000 non-null  float64
 2   EDUCATION  30000 non-null  float64
 3   MARRIAGE   30000 non-null  float64
 4   AGE        30000 non-null  float64
 5   PAY_0      30000 non-null  float64
 6   PAY_2      30000 non-null  float64
 7   PAY_3      30000 non-null  float64
 8   PAY_4      30000 non-null  float64
 9   PAY_5      30000 non-null  float64
 10  PAY_6      30000 non-null  float64
 11  BILL_AMT1  30000 non-null  float64
 12  BILL_AMT2  30000 non-null  float64
 13  BILL_AMT3  30000 non-null  float64
 14  BILL_AMT4  30000 non-null  float64
 15  BILL_AMT5  30000 non-null  float64
 16  BILL_AMT6  30000 non-null  float64
 17  PAY_AMT1   30000 non-null  float64
 18  PAY_AMT2   30000 non-null  float64
 19  PAY_AMT3   30000 non-null  float64
 20  PAY_AM

(None, None)

### Splitting the Datasets

In [5]:
# Splitting Credit Dataset
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=10)
sss.get_n_splits(creditX, creditY)
for train_index, test_index in sss.split(creditX, creditY):
    creditx_train, creditx_test = creditX.loc[train_index], creditX.loc[test_index]
    credity_train, credity_test = creditY.loc[train_index], creditY.loc[test_index]

### Neural Network using Random Hill Climbing Optimization 

In [6]:
%%time
# Running Random Hill Climb on Credit Dataset
rhcnn_model1 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                                 algorithm = 'random_hill_climb', 
                                 max_iters = 1000,
                                 bias = True, 
                                 is_classifier = True, 
                                 learning_rate = 0.001,
                                 early_stopping = True,
                                 clip_max = 5,
                                 max_attempts = 100,
                                 random_state = 3)

rhcnn_model1.fit(creditx_train, credity_train)

CPU times: user 34.8 s, sys: 311 ms, total: 35.1 s
Wall time: 5.88 s


NeuralNetwork(bias=True, clip_max=5, early_stopping=True, hidden_nodes=[2],
              is_classifier=True, learning_rate=0.001, max_iters=1000,
              mutation_prob=0.1, pop_size=200, restarts=0,
              schedule=<mlrose.decay.GeomDecay object at 0x7fe49ed1f490>)

### Neural Network with Random Hill Climb Train Predictions

In [7]:
credity_rhcnn_train_predictions = rhcnn_model1.predict(creditx_train)
print(classification_report(credity_train, credity_rhcnn_train_predictions, digits=5))
print(confusion_matrix(credity_train, credity_rhcnn_train_predictions))

              precision    recall  f1-score   support

           0    0.77879   1.00000   0.87564     18691
           1    0.00000   0.00000   0.00000      5309

    accuracy                        0.77879     24000
   macro avg    0.38940   0.50000   0.43782     24000
weighted avg    0.60652   0.77879   0.68194     24000

[[18691     0]
 [ 5309     0]]


### Neural Network with Random Hill Climb Test Predictions

In [8]:
credity_rhcnn_test_predictions = rhcnn_model1.predict(creditx_test)
print(classification_report(credity_test, credity_rhcnn_test_predictions, digits=5))
print(confusion_matrix(credity_test, credity_rhcnn_test_predictions))

              precision    recall  f1-score   support

           0    0.77883   1.00000   0.87567      4673
           1    0.00000   0.00000   0.00000      1327

    accuracy                        0.77883      6000
   macro avg    0.38942   0.50000   0.43783      6000
weighted avg    0.60658   0.77883   0.68200      6000

[[4673    0]
 [1327    0]]


### Neural Network using Simulated Anealing Optimization 

In [9]:
%%time
sann_model1 = mlrose.NeuralNetwork(hidden_nodes = [2], activation = 'relu',
                                 algorithm = 'simulated_annealing', 
                                 max_iters = 1000,
                                 bias = True, 
                                 is_classifier = True, 
                                 learning_rate = 0.001,
                                 early_stopping = True,
                                 clip_max = 3,
                                 max_attempts = 100,
                                 random_state = 3)


sann_model1.fit(creditx_train, credity_train)

CPU times: user 49.1 s, sys: 345 ms, total: 49.4 s
Wall time: 8.28 s


NeuralNetwork(bias=True, clip_max=3, early_stopping=True, hidden_nodes=[2],
              is_classifier=True, learning_rate=0.001, max_iters=1000,
              mutation_prob=0.1, pop_size=200, restarts=0,
              schedule=<mlrose.decay.GeomDecay object at 0x7fe49ed1f490>)

### Neural Network with Simulated Anealing Train Predictions

In [10]:
credity_sann_train_predictions = sann_model1.predict(creditx_train)
print(classification_report(credity_train, credity_sann_train_predictions, digits=5))
print(confusion_matrix(credity_train, credity_sann_train_predictions))

              precision    recall  f1-score   support

           0    0.77879   1.00000   0.87564     18691
           1    0.00000   0.00000   0.00000      5309

    accuracy                        0.77879     24000
   macro avg    0.38940   0.50000   0.43782     24000
weighted avg    0.60652   0.77879   0.68194     24000

[[18691     0]
 [ 5309     0]]


### Neural Network with Simulated Anealing Test Predictions

In [11]:
credity_sann_test_predictions = sann_model1.predict(creditx_test)
print(classification_report(credity_test, credity_sann_test_predictions, digits=5))
print(confusion_matrix(credity_test, credity_sann_test_predictions))

              precision    recall  f1-score   support

           0    0.77883   1.00000   0.87567      4673
           1    0.00000   0.00000   0.00000      1327

    accuracy                        0.77883      6000
   macro avg    0.38942   0.50000   0.43783      6000
weighted avg    0.60658   0.77883   0.68200      6000

[[4673    0]
 [1327    0]]


### Neural Network using Genetic Algorithm Optimization 

In [12]:
%%time
gann_model1 = mlrose.NeuralNetwork(hidden_nodes = [10], activation = 'relu',
                                 algorithm = 'genetic_alg', 
                                 max_iters = 1000,
                                 bias = True, 
                                 is_classifier = True, 
                                 learning_rate = 0.001,
                                 early_stopping = True,
                                 clip_max = 3,
                                 max_attempts = 100,
                                 random_state = 3,
                                 pop_size=200,
                                 mutation_prob=0.25)
gann_model1.fit(creditx_train, credity_train)

CPU times: user 21min 55s, sys: 14.9 s, total: 22min 10s
Wall time: 3min 43s


NeuralNetwork(bias=True, clip_max=3, early_stopping=True, hidden_nodes=[10],
              is_classifier=True, learning_rate=0.001, max_iters=1000,
              mutation_prob=0.25, pop_size=200, restarts=0,
              schedule=<mlrose.decay.GeomDecay object at 0x7fe49ed1f490>)

### Neural Network with Genetic Algorithm Train Predictions

In [13]:
credity_gann_train_predictions = gann_model1.predict(creditx_train)
print(classification_report(credity_train, credity_gann_train_predictions, digits=5))
print(confusion_matrix(credity_train, credity_gann_train_predictions))

              precision    recall  f1-score   support

           0    0.77723   0.92344   0.84405     18691
           1    0.20190   0.06819   0.10194      5309

    accuracy                        0.73425     24000
   macro avg    0.48956   0.49581   0.47300     24000
weighted avg    0.64996   0.73425   0.67989     24000

[[17260  1431]
 [ 4947   362]]


### Neural Network with Genetic Algorithm  Test Predictions

In [14]:
credity_gann_test_predictions = gann_model1.predict(creditx_test)
print(classification_report(credity_test, credity_gann_test_predictions, digits=5))
print(confusion_matrix(credity_test, credity_gann_test_predictions))

              precision    recall  f1-score   support

           0    0.78117   0.93195   0.84992      4673
           1    0.25176   0.08063   0.12215      1327

    accuracy                        0.74367      6000
   macro avg    0.51647   0.50629   0.48603      6000
weighted avg    0.66408   0.74367   0.68896      6000

[[4355  318]
 [1220  107]]
