In [1]:
import pandas as pd
import helpers.processing_helpers as ph

from sklearn.model_selection import RandomizedSearchCV

from sklearn.metrics import make_scorer

from sklearn.neural_network import  MLPRegressor

from sklearn.discriminant_analysis import StandardScaler
from sklearn.pipeline import Pipeline


In [2]:
df_dev = pd.read_csv("./dataset/development.csv")

In [3]:
noise_indexes = [0,7,12,15,16,17]
acc_idxs = [1,2,3,4,5,6,8,9,10,11,13,14]
features = ["pmax", "negpmax", 'area', 'tmax', 'rms']

sensors_removed = df_dev.drop(columns=ph.get_column_names(features, noise_indexes))
df = sensors_removed.drop(columns=ph.get_column_names(['tmax', 'rms', 'area'], acc_idxs))

In [4]:
df = df.sample(frac=1)

In [5]:
score = make_scorer(ph.mean_euclid_dist, greater_is_better=False)

In [6]:

pipe = Pipeline([
    ('scale', StandardScaler()),
    ('clf', MLPRegressor(random_state=42, max_iter=200, n_iter_no_change=30, learning_rate_init=0.01))
])

In [7]:
y_train = df[['x', 'y']].copy()

X_train = df.drop(columns=['x', 'y'])

In [8]:
param_grid = {'clf__hidden_layer_sizes' : [(50,),
                                            (25, 25),
                                            (35, 15),
                                            (20, 10, 20),
                                            (25, 15, 10),
                                            (20, 15, 10, 5),
                                            (15, 10, 10, 15)],
            'clf__activation' : ['relu', 
                              'logistic', 
                              'tanh', 
                              'identity'],
            'clf__learning_rate_init': [0.01, 0.001],
            'clf__learning_rate': ['constant', 'invscaling', 'adaptive'],
            'clf__solver': ['adam', 'sgd', 'lbfgs']
              }

In [9]:
gridsearch = RandomizedSearchCV(pipe, param_grid, scoring=score, cv=2, verbose=2, n_iter=3)
gridsearch.fit(X_train, y_train)



Fitting 2 folds for each of 2 candidates, totalling 4 fits
[CV] END ......................clf__hidden_layer_sizes=(50,); total time=  56.2s
[CV] END ......................clf__hidden_layer_sizes=(50,); total time=  41.8s
[CV] END ...........clf__hidden_layer_sizes=(15, 10, 10, 15); total time= 2.1min




[CV] END ...........clf__hidden_layer_sizes=(15, 10, 10, 15); total time= 2.0min




In [11]:
results = pd.concat([pd.DataFrame(gridsearch.cv_results_["params"]),
                     -pd.DataFrame(gridsearch.cv_results_["mean_test_score"], columns=["MED"]),
                     pd.DataFrame(gridsearch.cv_results_["mean_fit_time"], columns=["Time"])],
                     axis=1)
df = results.sort_values('MED')

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
 
display(df)

pd.reset_option('all')

Unnamed: 0,clf__hidden_layer_sizes,MED,Time
0,"(50,)",6.425588,48.961549
1,"(15, 10, 10, 15)",6.815226,124.409922


  pd.reset_option('all')
