In [3]:
import numpy as np  
import matplotlib.pyplot as plt  
import pandas as pd 
import random
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import preprocessing
from sklearn.svm import SVC 
from imblearn.over_sampling import RandomOverSampler

In [4]:
df = pd.read_csv('train.csv')
#one hot encoding
df['Geography'] = df['Geography'].map({'S0':0, 'S1':1, 'S2':2})
df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})

In [5]:
min_max_scaler = preprocessing.MinMaxScaler()

CreditScore = df['CreditScore']
Geography = df['Geography']
Gender = df['Gender']
Age = df['Age']
Tenure = df['Tenure']
Balance = df['Balance']
NumOfProducts = df['NumOfProducts']
HasCrCard = df['HasCrCard']
IsActiveMember = df['IsActiveMember']
EstimatedSalary = df['EstimatedSalary']
Exited = df['Exited']
#select the features
features = (Geography, Age, Tenure, NumOfProducts, HasCrCard, \
            IsActiveMember)
feature_name = ('Geography', 'Age', 'Tenure', 'NumOfProducts', 'HasCrCard', \
            'IsActiveMember')
#features preprocessing
scale_feature = {}
for i, d in enumerate(features):
    tmp = np.array(d).astype(float)
    _arr = min_max_scaler.fit_transform(tmp.reshape(-1, 1))
    scale_feature[feature_name[i]] = _arr

X = np.zeros((len(Age), len(feature_name)))
for i, _feature in enumerate(feature_name):
    for j in range(len(Age)):
        X[j][i] = scale_feature[_feature][j]
        
for i in range(len(Age)):
    for j in range(len(features)):
        if X[i][j] == 1:
            X[i][j] = 0.9999
        elif X[i][j] == 0:
            X[i][j] = 0.0001

y = np.array(Exited.tolist())

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

#divide class 0 into 4 classes 
y_train = list(y_train)
class_add_list = [0, 2, 3, 4]
y_more_class = []
cnt = 0
for i, c in enumerate(y_train):
    if c==0:
        pt = cnt%len(class_add_list)
        y_more_class.append(class_add_list[pt])
        cnt+=1
    else:
        y_more_class.append(1)

y_train =np.array(y_more_class)

#parameters 
_solver = 'adam'
_activation = 'relu'
_alpha = 1e-4
_hidden_layer = (32, 32)
_lr_rate = 0.001
_batch = 128
_max_iter = 200

mlp = MLPClassifier(solver=_solver, activation=_activation, alpha=_alpha, hidden_layer_sizes=_hidden_layer,\
                    batch_size=_batch, learning_rate_init=_lr_rate, max_iter=_max_iter)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)

#tidy up the results
for i in range(len(list(y_pred))):
    if y_pred[i]!=1:
        y_pred[i] = 0
print(classification_report(y_test, y_pred)) 

              precision    recall  f1-score   support

           0       0.91      0.80      0.85      1264
           1       0.48      0.72      0.58       336

   micro avg       0.78      0.78      0.78      1600
   macro avg       0.70      0.76      0.71      1600
weighted avg       0.82      0.78      0.79      1600

