In [4]:
# General imports
import numpy as np

# Data Analysis
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import MinMaxScaler, Normalizer


# Custom Stuff
# required for importin modules from other directories
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

from common import data_parser, misc
from common.model_trainer import ModelTrainer
import companies_data_preprocessor

# Data Preparation

In [5]:
df = data_parser.parse_companies(5)
raw_data, labels = companies_data_preprocessor.preprocess(df, MinMaxScaling=True, imputation=0)




# Model Training

In [6]:
# Simple Imputation with 0
preprocessed_data = pd.DataFrame(SimpleImputer(missing_values=np.NaN, strategy="constant", fill_value=0).fit_transform(raw_data))

# Normalization with l1 norm
preprocessed_data = pd.DataFrame(Normalizer(norm="l1").fit_transform(preprocessed_data))



#plot_params(preprocessed_data, labels, fileName="companies_knn_Imputation_0_l1Norm", title="Imputation with 0, normalization with l1")


x_train, x_test, y_train, y_test = train_test_split(preprocessed_data, labels, test_size=0.3, random_state=1 )

hidden_layers = []
for i in range(5,10):
    for j in range(5,10):
        hidden_layers.append((i,j))
print(hidden_layers)
params = {
    "hidden_layer_sizes" : hidden_layers, 
    "alpha" : [0.001, 0.0001],
    #solver" : ["lbfgs", "sgd", "adam"]
    }

modeltrainer = ModelTrainer(MLPClassifier, params, x_train, y_train, x_test, y_test, companies_data_preprocessor.calculate_score, thread_cnt=8)
modeltrainer.train()
print(modeltrainer.result)


[(5, 5), (5, 6), (5, 7), (5, 8), (5, 9), (6, 5), (6, 6), (6, 7), (6, 8), (6, 9), (7, 5), (7, 6), (7, 7), (7, 8), (7, 9), (8, 5), (8, 6), (8, 7), (8, 8), (8, 9), (9, 5), (9, 6), (9, 7), (9, 8), (9, 9)]
Finished evaluation
Best parameteters found with: {'hidden_layer_sizes': (5, 5), 'alpha': 0.0001}
score= 1.2492949802594473
Total evaluation time = 13.00s
   hidden_layer_sizes   alpha     score
0              (5, 5)  0.0010  1.192329
1              (5, 5)  0.0001  1.249295
2              (5, 6)  0.0010  1.192329
3              (5, 6)  0.0001  1.249295
4              (5, 7)  0.0010  1.249295
5              (5, 7)  0.0001  1.249295
6              (5, 8)  0.0010  1.249295
7              (5, 8)  0.0001  1.192329
8              (5, 9)  0.0010  1.249295
9              (5, 9)  0.0001  1.197970
10             (6, 5)  0.0010  1.249295
11             (6, 5)  0.0001  1.249295
12             (6, 6)  0.0010  1.055274
13             (6, 6)  0.0001  1.249295
14             (6, 7)  0.0010  1.249295
15  