In [6]:
from sklearn.neural_network import MLPRegressor
from scipy import sparse
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
from numpy import genfromtxt

In [7]:
X = np.load("data/data_prepared.npy")
y = genfromtxt('data/data_prepared_labels.csv', delimiter=',')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state=42)
X_train, Y_train = X_train[:50000], y_train[:50000]

In [8]:
regressor = MLPRegressor(max_iter = 50)

In [1]:
for x in range(1, 5):
    for y in [50, 100, 200]:
        print("(", end = '')
        for z in range(x):
            print(str(y) + ', ', end = '')
        print(")", end = ', ')

(50, ), (100, ), (200, ), (50, 50, ), (100, 100, ), (200, 200, ), (50, 50, 50, ), (100, 100, 100, ), (200, 200, 200, ), (50, 50, 50, 50, ), (100, 100, 100, 100, ), (200, 200, 200, 200, ), 

First try to find the best size

In [10]:
param_grid = [
    {'batch_size' : [256], 'hidden_layer_sizes' : [(50, ), (100, ), (200, ), (50, 50, ), (100, 100, ), (200, 200, ), (50, 50, 50, ), (100, 100, 100, ), (200, 200, 200, ), (50, 50, 50, 50, ), (100, 100, 100, 100, ), (200, 200, 200, 200, ),]},
]
grid_search = GridSearchCV(regressor, param_grid, cv = 5, scoring='neg_mean_squared_error', return_train_score=True, verbose=3)

In [None]:
grid_search.fit(X_train, Y_train)

In [12]:
cvres = grid_search.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(np.sqrt(-mean_score), params)

343.97044654195463 {'hidden_layer_sizes': (50,)}
338.76356011308667 {'hidden_layer_sizes': (100,)}
335.19960434802795 {'hidden_layer_sizes': (200,)}
333.9572750222429 {'hidden_layer_sizes': (50, 50)}
331.02353830870595 {'hidden_layer_sizes': (100, 100)}
330.7279477365592 {'hidden_layer_sizes': (200, 200)}
329.37718391076174 {'hidden_layer_sizes': (50, 50, 50)}
330.6324426599605 {'hidden_layer_sizes': (100, 100, 100)}
333.9530040321373 {'hidden_layer_sizes': (200, 200, 200)}
330.6829529907919 {'hidden_layer_sizes': (50, 50, 50, 50)}
332.8201530916484 {'hidden_layer_sizes': (100, 100, 100, 100)}
344.5413814210838 {'hidden_layer_sizes': (200, 200, 200, 200)}
341.1986675676969 {'batch_size': 256}
331.4010698442091 {'learning_rate_init': 0.01}


Now we search for the best batch size and learning rate, also we try different numbers hidden layers

In [10]:
param_grid = [
    {'hidden_layer_sizes' : [(1000, 500, 100), (1000), (500, 250, 100), (100, 50), (200, 100)]},
    {'batch_size' : [16, 32, 64, 128]},
    {'learning_rate_init' : [0.005, 0.007, 0.01]}
]

grid_search = GridSearchCV(regressor, param_grid, cv = 5, scoring='neg_mean_squared_error', return_train_score=True, verbose=3)

In [None]:
grid_search.fit(X_train, Y_train)

In [13]:
cvres = grid_search.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(np.sqrt(-mean_score), params)

346.3243302380783 {'hidden_layer_sizes': (1000, 500, 100)}
335.9010449163782 {'hidden_layer_sizes': 1000}
342.22532887126033 {'hidden_layer_sizes': (500, 250, 100)}
338.42153737630036 {'hidden_layer_sizes': (100, 50)}
339.49327684992573 {'hidden_layer_sizes': (200, 100)}
335.0155828897767 {'batch_size': 16}
336.58638198467 {'batch_size': 32}
339.4732166896819 {'batch_size': 64}
349.2103584093623 {'batch_size': 128}
334.7839751822925 {'learning_rate_init': 0.005}
334.2307120308362 {'learning_rate_init': 0.007}
335.7986563713893 {'learning_rate_init': 0.01}
