In [1]:
import numpy as np  
import matplotlib.pyplot as plt  
import pandas as pd 
import random
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import preprocessing
from sklearn.decomposition import PCA
from imblearn.over_sampling import RandomOverSampler

In [2]:
df = pd.read_csv('train.csv')
#one hot encoding
df['Geography'] = df['Geography'].map({'S0':0, 'S1':1, 'S2':2})
df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})

In [3]:
#preprocessing
min_max_scaler = preprocessing.MinMaxScaler()

CreditScore = df['CreditScore']
Geography = df['Geography']
Gender = df['Gender']
Age = df['Age']
Tenure = df['Tenure']
Balance = df['Balance']
NumOfProducts = df['NumOfProducts']
HasCrCard = df['HasCrCard']
IsActiveMember = df['IsActiveMember']
EstimatedSalary = df['EstimatedSalary']
Exited = df['Exited']

features = (CreditScore, Geography, Age, Tenure, NumOfProducts, HasCrCard, \
            IsActiveMember, EstimatedSalary)
feature_name = ('CreditScore', 'Geography', 'Age', 'Tenure', 'NumOfProducts', 'HasCrCard', \
            'IsActiveMember', 'EstimatedSalary')

scale_feature = {}
for i, d in enumerate(features):
    tmp = np.array(d).astype(float)
    _arr = min_max_scaler.fit_transform(tmp.reshape(-1, 1))
    scale_feature[feature_name[i]] = _arr

X = np.zeros((len(Age), len(feature_name)))
for i, _feature in enumerate(feature_name):
    for j in range(len(Age)):
        X[j][i] = scale_feature[_feature][j]
        
for i in range(len(Age)):
    for j in range(len(features)):
        if X[i][j] == 1:
            X[i][j] = 0.9999
        elif X[i][j] == 0:
            X[i][j] = 0.0001

y = np.array(Exited.tolist())

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) 

#oversampling
ros = RandomOverSampler(random_state=2)
X_res, y_res = ros.fit_resample(X_train, y_train)
X_train = X_res
y_train = y_res

#parameters
_solver = 'adam'
_activation = 'relu'
_alpha = 1e-4
_hidden_layer = (32,32)
_lr_rate = 0.001
_batch = 128
_max_iter = 200

mlp = MLPClassifier(solver=_solver, activation=_activation, alpha=_alpha, hidden_layer_sizes=_hidden_layer,\
                    batch_size=_batch, learning_rate_init=_lr_rate, max_iter=_max_iter)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)

print(classification_report(y_test, y_pred)) 

              precision    recall  f1-score   support

           0       0.92      0.81      0.86      1283
           1       0.48      0.71      0.57       317

   micro avg       0.79      0.79      0.79      1600
   macro avg       0.70      0.76      0.72      1600
weighted avg       0.83      0.79      0.80      1600

