In [3]:
import pandas as pd
import helpers.processing_helpers as ph

from sklearn.model_selection import RandomizedSearchCV

from sklearn.metrics import make_scorer

from sklearn.neural_network import  MLPRegressor

from sklearn.discriminant_analysis import StandardScaler
from sklearn.pipeline import Pipeline


In [4]:
df_dev = pd.read_csv("./dataset/development.csv")

In [5]:
noise_indexes = [0,7,12,15,16,17]
acc_idxs = [1,2,3,4,5,6,8,9,10,11,13,14]
features = ["pmax", "negpmax", 'area', 'tmax', 'rms']

sensors_removed = df_dev.drop(columns=ph.get_column_names(features, noise_indexes))
df = sensors_removed.drop(columns=ph.get_column_names(['tmax', 'rms', 'area'], acc_idxs))

In [6]:
score = make_scorer(ph.mean_euclid_dist, greater_is_better=False)

In [None]:
# shuffling the dataset
df = df.sample(frac=1)

In [7]:
# These will be the constant parameters

pipe = Pipeline([
    ('scale', StandardScaler()),
    ('clf', MLPRegressor(random_state=42, max_iter=200, n_iter_no_change=50))
])

In [8]:
y_train = df[['x', 'y']].copy()

X_train = df.drop(columns=['x', 'y'])

In [9]:
param_grid = {'clf__hidden_layer_sizes' : [(50,),
                                            (25, 25),
                                            (35, 15),
                                            (20, 10, 20),
                                            (25, 15, 10),
                                            (20, 15, 10, 5),
                                            (15, 10, 10, 15)],
            'clf__activation' : ['relu', 
                              'logistic', 
                              'tanh', 
                              'identity'],
            'clf__learning_rate_init': [0.01, 0.001],
            'clf__learning_rate': ['constant', 'invscaling', 'adaptive'],
            'clf__solver': ['adam', 'sgd', 'lbfgs']
              }

In [10]:
# n_iter is the amount of combinations to test
gridsearch = RandomizedSearchCV(pipe, param_grid, scoring=score, cv=2, verbose=2, n_iter=3)
gridsearch.fit(X_train, y_train)

Fitting 2 folds for each of 3 candidates, totalling 6 fits




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(35, 15), clf__learning_rate=constant, clf__learning_rate_init=0.001, clf__solver=adam; total time= 2.1min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(35, 15), clf__learning_rate=constant, clf__learning_rate_init=0.001, clf__solver=adam; total time= 2.1min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=adam; total time= 2.2min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=adam; total time= 2.2min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time=  39.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time=  41.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [11]:
results = pd.concat([pd.DataFrame(gridsearch.cv_results_["params"]),
                     pd.DataFrame(gridsearch.cv_results_["mean_test_score"], columns=["MED"]),
                     pd.DataFrame(gridsearch.cv_results_["mean_fit_time"], columns=["Time"])],
                     axis=1)
df = results.sort_values('MED', ascending=False)

# Permanently changes the pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
 
# All dataframes hereafter reflect these changes.
display(df)

pd.reset_option('all')

Unnamed: 0,clf__solver,clf__learning_rate_init,clf__learning_rate,clf__hidden_layer_sizes,clf__activation,MED,Time
2,lbfgs,0.01,constant,"(25, 15, 10)",relu,-89.642781,40.247049
0,adam,0.001,constant,"(35, 15)",tanh,-107.740422,124.827203
1,adam,0.01,adaptive,"(25, 15, 10)",tanh,-138.283721,133.554533


  pd.reset_option('all')
