In [52]:
import pandas as pd
import numpy as np
import pickle
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import chi2_kernel

In [53]:
train = pd.read_csv("train.csv", encoding='utf-8')
test = pd.read_csv("test.csv", encoding='utf-8')

In [54]:
train_label = train.loc[:,"satisfaction"]
train_data = train.drop('satisfaction', axis=1)
test_label = test.loc[:,"satisfaction"]
test_data = test.drop('satisfaction', axis=1)

In [55]:
drop_row = [i for i in range(5000, len(train_data))]
train_data.drop(drop_row, inplace=True)
train_label.drop(drop_row, inplace=True)

drop_row = [i for i in range(1000, len(test_data))]
test_data.drop(drop_row, inplace=True)
test_label.drop(drop_row, inplace=True)

In [56]:
train_data = train_data.T.values.tolist()
train_label = train_label.T.values.tolist()
test_data = test_data.T.values.tolist()
test_label = test_label.T.values.tolist()

In [57]:
def normalize(data: list, col_index: list = None)-> list:
    if col_index is None:
        temp_lst = [] 
        temp_map = {}
        map_value = 0
        for i in data:
            if i not in temp_map:
                temp_map[i] = map_value
                map_value += 1
            temp_lst.append(temp_map[i])
        data = temp_lst
        return data
        
    for col in col_index:
        temp_lst = [] 
        temp_map = {}
        map_value = 0
        for i in data[col]:
            if i not in temp_map:
                temp_map[i] = map_value
                map_value += 1
            temp_lst.append(temp_map[i])
        data[col] = temp_lst
    return data


def fillnan(data: list)-> list:
    for i in data:
        if np.isnan(i).any():
            index = data.index(i)
            data[index] = pd.Series(data[index]).fillna(0).tolist()
    return data


In [58]:
train_data = normalize(train_data, [2,3,5,6])
train_label = normalize(train_label)
test_data = normalize(test_data, [2,3,5,6])
test_label = normalize(test_label)

train_data = fillnan(train_data)
train_label = fillnan(train_label)
test_data = fillnan(test_data)
test_label = fillnan(test_label)


In [59]:
train_data = pd.DataFrame(train_data).T.values.tolist()
test_data = pd.DataFrame(test_data).T.values.tolist()

In [60]:
transformed_train_data = chi2_kernel(train_data, gamma=0.5)
transformed_train_data

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [66]:
transformed_test_data = chi2_kernel(test_data, train_data, gamma=0.5)
transformed_test_data.shape

(1000, 5000)

In [62]:
mlp = MLPClassifier(hidden_layer_sizes=(256,64,16,4), learning_rate_init=0.001, max_iter=100, tol=0.00001, verbose=True)
mlp.fit(transformed_train_data, train_label)

Iteration 1, loss = 0.70811988
Iteration 2, loss = 0.68471211
Iteration 3, loss = 0.64964175
Iteration 4, loss = 0.50552801
Iteration 5, loss = 0.24728940
Iteration 6, loss = 0.19467784
Iteration 7, loss = 0.18738326
Iteration 8, loss = 0.18189203
Iteration 9, loss = 0.17698344
Iteration 10, loss = 0.17245981
Iteration 11, loss = 0.16825104
Iteration 12, loss = 0.16426344
Iteration 13, loss = 0.16048676
Iteration 14, loss = 0.15684861
Iteration 15, loss = 0.15339801
Iteration 16, loss = 0.15005684
Iteration 17, loss = 0.14685981
Iteration 18, loss = 0.14375067
Iteration 19, loss = 0.14077931
Iteration 20, loss = 0.13786549
Iteration 21, loss = 0.13507673
Iteration 22, loss = 0.13237003
Iteration 23, loss = 0.12971837
Iteration 24, loss = 0.12716191
Iteration 25, loss = 0.12469176
Iteration 26, loss = 0.12226787
Iteration 27, loss = 0.11993465
Iteration 28, loss = 0.11764715
Iteration 29, loss = 0.11542332
Iteration 30, loss = 0.11327118
Iteration 31, loss = 0.11116175
Iteration 32, los



In [63]:
filename = "enhanced_mlp_classifier.sav"
with open(filename,'wb') as outfile:
    pickle.dump(mlp, outfile)

In [67]:
accuracy_score(test_label, mlp.predict(transformed_test_data))

0.545