In [10]:
from collections import defaultdict
import numpy as np
import pandas
import os
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor


def read_data(path):
    input_data = np.array([])
    output_data = np.array([])
    category_index_input = dict()
    category_index_output = dict()
    for file in os.listdir(path):
        if file.endswith('.csv'):
            file_path = path + file
            dataframe = pandas.read_csv(file_path)
            dataset = dataframe.values
            if not output_data.any():
                output_data = dataset[:,0:3]
                input_data = dataset[:,3:]
            else:
                output_data = np.append(output_data, dataset[:,0:3], axis=0)
                input_data = np.append(input_data, dataset[:,3:], axis=0)
    # Set category indices
    data_types = dataframe.columns
    input_categories = data_types[3:]
    output_categories = data_types[0:3]
    for i, category in enumerate(input_categories):
        category_index_input[category] = i
    for i,category in enumerate(output_categories):
        category_index_output[category] = i
    return category_index_input, category_index_output, input_data, output_data


In [5]:
category_index_input, category_index_output, input_data, output_data = read_data("train_data/")

In [8]:
X_train, X_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2, random_state=42)

In [208]:

X_train

array([[  8.66276000e+01,   7.98884000e-01,   1.14185000e-03, ...,
          9.49023000e+00,   9.11389000e+00,   8.99443000e+00],
       [  7.15689000e+01,  -7.72318000e-01,   1.02466000e-01, ...,
          1.17515000e+00,   1.14147000e+00,   1.14462000e+00],
       [  1.30890000e+02,  -3.97034000e-01,  -8.26089000e-02, ...,
          3.32600000e+00,   3.11937000e+00,   3.02534000e+00],
       ..., 
       [  1.46223000e+02,   3.34005000e-01,   1.99369000e-02, ...,
          7.04835000e+00,   6.75053000e+00,   6.67135000e+00],
       [  1.37943000e+02,  -1.61521000e-01,   5.64173000e-02, ...,
          4.37856000e+00,   4.22181000e+00,   4.19908000e+00],
       [  8.25764000e+01,   4.36719000e-01,   2.04623000e-02, ...,
          7.58969000e+00,   7.26971000e+00,   7.18510000e+00]])

In [17]:
MLPR = MLPRegressor(hidden_layer_sizes=(90, ), activation='logistic', solver='lbfgs', alpha=0.0001, batch_size='auto', 
                    learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, 
                    random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, 
                    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

In [18]:
MLPR.fit(X_train, y_train)

MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(90,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [19]:
import pickle
pickle.dump(MLPR, open("MLPR.p", "wb" ) )

In [21]:
model = pickle.load(open("MLPR.p", "rb" ) )

In [211]:
predicted = MLPR.predict(X_test)

In [124]:
for i in range(len(predicted)):
    if predicted[i] < 0.5:
        predicted[i] = 0
    else:
        predicted[i] = 1
        

In [199]:
import math
true_predicted = []
for i in range(len(predicted)):
    if predicted[i][2] == y_test[i][2]:
        true_predicted.append(predicted[i])

In [216]:
predicted[3]

array([ 0.94302076,  0.25947137,  0.15552017])

In [217]:
y_test[3]

array([ 1.        ,  0.        , -0.01768568])