##### import section

In [65]:
import pandas as pd
import numpy as np
import pickle
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

##### Reading train & test data from files

In [57]:
train = pd.read_csv("train.csv", encoding='utf-8')
test = pd.read_csv("test.csv", encoding='utf-8')

##### Seprating data from its labels

In [58]:
train_label = train.loc[:,"satisfaction"]
train_data = train.drop('satisfaction', axis=1)
test_label = test.loc[:,"satisfaction"]
test_data = test.drop('satisfaction', axis=1)

In [59]:
train_data = train_data.T.values.tolist()
train_label = train_label.T.values.tolist()
test_data = test_data.T.values.tolist()
test_label = test_label.T.values.tolist()

##### Normalizing data

In [60]:
def normalize(data: list, col_index: list = None)-> list:
    if col_index is None:
        temp_lst = [] 
        temp_map = {}
        map_value = 0
        for i in data:
            if i not in temp_map:
                temp_map[i] = map_value
                map_value += 1
            temp_lst.append(temp_map[i])
        data = temp_lst
        return data
        
    for col in col_index:
        temp_lst = [] 
        temp_map = {}
        map_value = 0
        for i in data[col]:
            if i not in temp_map:
                temp_map[i] = map_value
                map_value += 1
            temp_lst.append(temp_map[i])
        data[col] = temp_lst
    return data


def fillnan(data: list)-> list:
    for i in data:
        if np.isnan(i).any():
            index = data.index(i)
            data[index] = pd.Series(data[index]).fillna(0).tolist()
    return data


In [61]:
train_data = normalize(train_data, [2,3,5,6])
train_label = normalize(train_label)
test_data = normalize(test_data, [2,3,5,6])
test_label = normalize(test_label)

train_data = fillnan(train_data)
train_label = fillnan(train_label)
test_data = fillnan(test_data)
test_label = fillnan(test_label)


In [62]:
train_data = pd.DataFrame(train_data).T.values.tolist()
test_data = pd.DataFrame(test_data).T.values.tolist()

##### Configuration and deployment of the classifier on the train data

In [63]:
mlp = MLPClassifier(hidden_layer_sizes=(128,64,16,), learning_rate_init=0.001, max_iter=300, tol=0.00001, verbose=True)
mlp.fit(train_data, train_label)

Iteration 1, loss = 14.33012920
Iteration 2, loss = 13.24024492
Iteration 3, loss = 12.14205671
Iteration 4, loss = 11.25406041
Iteration 5, loss = 10.65808545
Iteration 6, loss = 10.19422550
Iteration 7, loss = 8.28850232
Iteration 8, loss = 8.77746755
Iteration 9, loss = 7.38160780
Iteration 10, loss = 6.23066581
Iteration 11, loss = 5.97446534
Iteration 12, loss = 4.40158112
Iteration 13, loss = 3.28491261
Iteration 14, loss = 2.97608976
Iteration 15, loss = 1.82337047
Iteration 16, loss = 1.39295207
Iteration 17, loss = 1.07535639
Iteration 18, loss = 0.65028393
Iteration 19, loss = 0.62421621
Iteration 20, loss = 0.61116862
Iteration 21, loss = 0.58403407
Iteration 22, loss = 0.61632715
Iteration 23, loss = 0.58488034
Iteration 24, loss = 0.75780615
Iteration 25, loss = 0.80569218
Iteration 26, loss = 0.65853738
Iteration 27, loss = 0.68463022
Iteration 28, loss = 0.67552984
Iteration 29, loss = 0.67961056
Iteration 30, loss = 0.69328413
Iteration 31, loss = 0.65522826
Iteration 3

##### Save classifier

In [64]:
filename = "mlp_classifier.sav"
with open(filename,'wb') as outfile:
    pickle.dump(mlp, outfile)

##### Evaluation

In [66]:
accuracy_score(test_label, mlp.predict(test_data))

0.41854019094548817