In [15]:
import numpy as np  
import matplotlib.pyplot as plt  
import pandas as pd 
import random
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import preprocessing
from sklearn.decomposition import PCA
from imblearn.over_sampling import RandomOverSampler

In [16]:
df = pd.read_csv('train.csv')
df['Geography'] = df['Geography'].map({'S0':0, 'S1':1, 'S2':2})
df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})

In [17]:
min_max_scaler = preprocessing.MinMaxScaler()

CreditScore = df['CreditScore']
Geography = df['Geography']
Gender = df['Gender']
Age = df['Age']
Tenure = df['Tenure']
Balance = df['Balance']
NumOfProducts = df['NumOfProducts']
HasCrCard = df['HasCrCard']
IsActiveMember = df['IsActiveMember']
EstimatedSalary = df['EstimatedSalary']
Exited = df['Exited']

# features = (CreditScore, Geography, Gender, Age, Tenure, Balance, NumOfProducts, HasCrCard, \
#             IsActiveMember, EstimatedSalary)
features = (CreditScore, Geography, Age, Tenure, NumOfProducts, HasCrCard, \
            IsActiveMember, EstimatedSalary)
# feature_name = ('CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', \
#             'IsActiveMember', 'EstimatedSalary')
feature_name = ('CreditScore', 'Geography', 'Age', 'Tenure', 'NumOfProducts', 'HasCrCard', \
            'IsActiveMember', 'EstimatedSalary')

scale_feature = {}
for i, d in enumerate(features):
    tmp = np.array(d).astype(float)
    _arr = min_max_scaler.fit_transform(tmp.reshape(-1, 1))
    scale_feature[feature_name[i]] = _arr

X = np.zeros((len(Age), len(feature_name)))
for i, _feature in enumerate(feature_name):
    for j in range(len(Age)):
        X[j][i] = scale_feature[_feature][j]
        
# for i in range(len(Age)):
#     for j in range(len(features)):
#         if X[i][j] == 1:
#             X[i][j] = 0.99
#         elif X[i][j] == 0:
#             X[i][j] = 0.01

y = np.array(Exited.tolist())

In [19]:
for i in range(3):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) 

    ros = RandomOverSampler(random_state=2)
    X_res, y_res = ros.fit_resample(X_train, y_train)
    X_train = X_res
    y_train = y_res

    _solver = 'adam'
    _activation = 'relu'
    _alpha = 1e-4
    _hidden_layer = (32, 128, 32)
    _lr_rate = 0.05
    _batch = 256
    _max_iter = 200


    mlp = MLPClassifier(solver=_solver, activation=_activation, alpha=_alpha, hidden_layer_sizes=_hidden_layer,\
                        batch_size=_batch, learning_rate_init=_lr_rate, max_iter=_max_iter)
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_test)

#     print(confusion_matrix(y_test, y_pred))  
    print(classification_report(y_test, y_pred)) 

              precision    recall  f1-score   support

           0       0.91      0.82      0.86      1278
           1       0.49      0.66      0.56       322

   micro avg       0.79      0.79      0.79      1600
   macro avg       0.70      0.74      0.71      1600
weighted avg       0.82      0.79      0.80      1600

              precision    recall  f1-score   support

           0       0.93      0.72      0.81      1266
           1       0.43      0.79      0.56       334

   micro avg       0.74      0.74      0.74      1600
   macro avg       0.68      0.76      0.68      1600
weighted avg       0.82      0.74      0.76      1600

              precision    recall  f1-score   support

           0       0.91      0.86      0.88      1257
           1       0.56      0.67      0.61       343

   micro avg       0.82      0.82      0.82      1600
   macro avg       0.73      0.76      0.75      1600
weighted avg       0.83      0.82      0.82      1600



In [8]:
df_test = pd.read_csv('test.csv')
df_test['Geography'] = df_test['Geography'].map({'S0':0, 'S1':1, 'S2':2})
df_test['Gender'] = df_test['Gender'].map({'Male':0, 'Female':1})

CreditScore = df_test['CreditScore']
Geography = df_test['Geography']
Gender = df_test['Gender']
Age = df_test['Age']
Tenure = df_test['Tenure']
Balance = df_test['Balance']
NumOfProducts = df_test['NumOfProducts']
HasCrCard = df_test['HasCrCard']
IsActiveMember = df_test['IsActiveMember']
EstimatedSalary = df_test['EstimatedSalary']

features = (CreditScore, Geography, Gender, Age, Tenure, Balance, NumOfProducts, HasCrCard, \
            IsActiveMember, EstimatedSalary)
feature_name = ('CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', \
            'IsActiveMember', 'EstimatedSalary')

scale_feature = {}
for i, d in enumerate(features):
    tmp = np.array(d).astype(float)
    _arr = min_max_scaler.fit_transform(tmp.reshape(-1, 1))
    scale_feature[feature_name[i]] = _arr

X_test = np.zeros((len(Age), len(feature_name)))
for i, _feature in enumerate(feature_name):
    for j in range(len(Age)):
        X_test[j][i] = scale_feature[_feature][j]
        
for i in range(len(Age)):
    for j in range(len(features)):
        if X_test[i][j] == 1:
            X_test[i][j] = 0.99
        elif X_test[i][j] == 0:
            X_test[i][j] = 0.01
        
y_pred = mlp.predict(X_test)

In [9]:
df_sample = pd.read_csv('sample_upload.csv')
df_sample['Exited'] = y_pred
df_sample.to_csv('to_upload.csv', index=False, sep=',')

In [10]:
df_check = pd.read_csv('to_upload.csv')
df_check[:20]

Unnamed: 0.1,Unnamed: 0,RowNumber,Exited
0,0,2209,0
1,1,9924,0
2,2,4617,0
3,3,6077,1
4,4,9240,0
5,5,4834,0
6,6,8523,0
7,7,2826,0
8,8,871,0
9,9,6698,0
