In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import mglearn
%matplotlib inline

In [2]:
import os
path = 'E:\Python Consumer Credit'
os.chdir(path)

In [3]:
df_tr=pd.read_csv('df__under_tr.csv',header=None)
df_ts=pd.read_csv('df__under_ts.csv',header=None)

In [4]:
X_train = df_tr.iloc[:,:-1].values
y_train = df_tr.iloc[:,-1].values
X_test = df_ts.iloc[:,:-1].values
y_test = df_ts.iloc[:,-1].values
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(557088, 14)
(139272, 14)
(557088,)
(139272,)


#### Multi Layer Perceptron (Neural Network)

In [5]:
from sklearn.neural_network import MLPClassifier
nnc = MLPClassifier(activation='relu', hidden_layer_sizes=[200, 100], learning_rate='adaptive', max_iter=1500, solver='adam').fit(X_train,y_train) 
y_tr_nnc_pred = nnc.predict(X_train)
y_ts_nnc_pred = nnc.predict(X_test)

In [6]:
from sklearn.metrics import confusion_matrix, accuracy_score
nnc_cm_tr = confusion_matrix(y_train,y_tr_nnc_pred)
print(nnc_cm_tr)
accuracy_score(y_train, y_tr_nnc_pred)

[[213183  65639]
 [ 97043 181223]]


0.7079779137227871

In [7]:
nnc_cm_ts = confusion_matrix(y_test,y_ts_nnc_pred)
print(nnc_cm_ts)
accuracy_score(y_test, y_ts_nnc_pred)

[[52612 16746]
 [25097 44817]]


0.6995591360790395

## Applying k-fold Cross Validation

In [8]:
from sklearn.model_selection import KFold, cross_val_score
accuracies = cross_val_score(estimator = nnc, X = X_train, y = y_train, cv = 5)
print('Accuracy: {:.2f} %'.format(accuracies.mean()*100))
print('Standard deviation: {:.2f} %'.format(accuracies.std()*100))

Accuracy: 69.69 %
Standard deviation: 0.16 %


In [9]:
accuracies

array([0.69677251, 0.69573139, 0.69977921, 0.69703008, 0.69533375])

## Grid Search

In [10]:
from sklearn.model_selection import GridSearchCV
parameters = [{'hidden_layer_sizes':[100,200,300,[200,50],[100,100],[200,100]],
               'activation':['identity','logistic','tanh','relu'],
               'solver': ['adam'], 
               'learning_rate':['constant','invscaling','adaptive'],
               'max_iter': [1000,1500,2000 ]}]
grid_search = GridSearchCV(estimator = nnc, 
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 5,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print('Best accuracy: {:.2f} %'.format(best_accuracy*100))
print('Best parameters: ',best_parameters)

Best accuracy: 69.75 %
Best parameters:  {'activation': 'relu', 'hidden_layer_sizes': [200, 100], 'learning_rate': 'adaptive', 'max_iter': 1500, 'solver': 'adam'}


## Metrics

In [8]:
from sklearn.metrics import roc_auc_score, jaccard_score, f1_score, precision_score, recall_score
print(roc_auc_score(y_test,y_ts_nnc_pred))
print(jaccard_score(y_test,y_ts_nnc_pred))
print(f1_score(y_test,y_ts_nnc_pred))
print(precision_score(y_test,y_ts_nnc_pred))
print(recall_score(y_test,y_ts_nnc_pred))

0.6997937303030213
0.5171590122317101
0.6817466172790678
0.7279859655962185
0.6410304087879395
