In [4]:
import pandas as pd
import helpers.processing_helpers as ph

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import make_scorer

from sklearn.neural_network import  MLPRegressor

from sklearn.discriminant_analysis import StandardScaler
from sklearn.pipeline import Pipeline


In [5]:
df_dev = pd.read_csv("./dataset/development.csv")

In [6]:
noise_indexes = [0,7,12,15,16,17]
acc_idxs = [1,2,3,4,5,6,8,9,10,11,13,14]
features = ["pmax", "negpmax", 'area', 'tmax', 'rms']

sensors_removed = df_dev.drop(columns=ph.get_column_names(features, noise_indexes))
df = sensors_removed.drop(columns=ph.get_column_names(['tmax', 'rms', 'area'], acc_idxs))

In [7]:
df = df.sample(frac=1)

In [8]:
score = make_scorer(ph.mean_euclid_dist, greater_is_better=False)

In [9]:
y_train = df[['x', 'y']].copy()

X_train = df.drop(columns=['x', 'y'])

In [6]:
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('clf', MLPRegressor(random_state=42, max_iter=300, n_iter_no_change=50, solver='adam'))
])

In [8]:
param_grid = {'clf__hidden_layer_sizes' : [(100, 100),
                                           (80, 40, 80),
                                           (80, 20, 20, 80)
                                            ], 
              'clf__learning_rate_init' : [0.01, 0.005, 0.001],
              'clf__activation' : ['tanh', 'logistic'],
              'clf__solver' : ['adam', 'sgd']
              }

In [9]:
gridsearch = GridSearchCV(pipe, param_grid, scoring=score, cv=2, verbose=2)
gridsearch.fit(X_train, y_train)

Fitting 2 folds for each of 32 candidates, totalling 64 fits




In [10]:
results = pd.concat([pd.DataFrame(gridsearch.cv_results_["params"]),
                     -pd.DataFrame(gridsearch.cv_results_["mean_test_score"], columns=["MED"]),
                     pd.DataFrame(gridsearch.cv_results_["mean_fit_time"], columns=["Time"])],
                     axis=1)
df = results.sort_values('MED')

# Permanently changes the pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
 
# All dataframes hereafter reflect these changes.
display(df)

pd.reset_option('all')

Unnamed: 0,clf__activation,clf__hidden_layer_sizes,clf__learning_rate_init,clf__solver,MED,Time
23,logistic,"(50, 50)",0.001,adam,4.486439,934.73341
7,tanh,"(50, 50)",0.001,adam,4.528648,987.89811
21,logistic,"(50, 50)",0.01,adam,4.606364,931.289746
11,tanh,"(40, 20, 40)",0.001,adam,4.62676,936.208085
27,logistic,"(40, 20, 40)",0.001,adam,4.662121,783.234618
25,logistic,"(40, 20, 40)",0.01,adam,4.686804,824.647403
15,tanh,"(40, 10, 10, 40)",0.001,adam,4.735635,972.129218
19,logistic,"(100,)",0.001,adam,4.741881,691.927194
17,logistic,"(100,)",0.01,adam,4.744047,836.737504
31,logistic,"(40, 10, 10, 40)",0.001,adam,4.821674,570.67462


  pd.reset_option('all')


In [7]:
param_grid2 = {'clf__hidden_layer_sizes' : [(50, 50),
                                           (40, 20, 40),
                                           (30, 20, 20, 30)
                                            ], 
              'clf__learning_rate_init' : [0.01, 0.005],
              'clf__activation' : ['tanh', 'logistic'],
              'clf__learning_rate': ['constant', 'invscaling'],
              }

In [8]:
pipe2 = Pipeline([
    ('scale', StandardScaler()),
    ('clf', MLPRegressor(random_state=42, max_iter=200, n_iter_no_change=50, solver='adam'))
])

In [9]:
gridsearch2 = GridSearchCV(pipe2, param_grid2, scoring=score, cv=2, verbose=2)
gridsearch2.fit(X_train, y_train)

Fitting 2 folds for each of 24 candidates, totalling 48 fits




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 4.5min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 4.2min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 4.1min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 4.2min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 4.3min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 4.3min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 4.3min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 4.3min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 2.8min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 2.6min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 3.2min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 3.2min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 2.9min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 2.6min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 3.2min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 3.2min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 2.7min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 3.2min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 3.4min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 3.5min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 2.7min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 3.1min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 3.4min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 3.4min
[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 4.3min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 6.0min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 7.2min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 6.6min
[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 5.0min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 6.0min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 6.0min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(50, 50), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 5.4min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 3.3min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 3.3min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 2.9min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 2.9min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 3.1min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 2.9min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 2.9min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(40, 20, 40), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 2.9min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 3.1min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=constant, clf__learning_rate_init=0.01; total time= 3.1min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 3.1min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=constant, clf__learning_rate_init=0.005; total time= 3.2min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 3.1min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=invscaling, clf__learning_rate_init=0.01; total time= 3.2min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 3.0min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(30, 20, 20, 30), clf__learning_rate=invscaling, clf__learning_rate_init=0.005; total time= 3.2min




In [11]:
results2 = pd.concat([pd.DataFrame(gridsearch2.cv_results_["params"]),
                     -pd.DataFrame(gridsearch2.cv_results_["mean_test_score"], columns=["MED"]),
                     pd.DataFrame(gridsearch2.cv_results_["mean_fit_time"], columns=["Time"])],
                     axis=1)
df_results2 = results2.sort_values('MED')

# Permanently changes the pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
 
# All dataframes hereafter reflect these changes.
display(df_results2)

pd.reset_option('all')

Unnamed: 0,clf__activation,clf__hidden_layer_sizes,clf__learning_rate,clf__learning_rate_init,MED,Time
13,logistic,"(50, 50)",constant,0.005,4.472788,410.935081
15,logistic,"(50, 50)",invscaling,0.005,4.472788,344.102655
19,logistic,"(40, 20, 40)",invscaling,0.005,4.592483,173.685579
17,logistic,"(40, 20, 40)",constant,0.005,4.592483,174.635135
12,logistic,"(50, 50)",constant,0.01,4.615705,308.169978
14,logistic,"(50, 50)",invscaling,0.01,4.615705,329.871122
18,logistic,"(40, 20, 40)",invscaling,0.01,4.691565,180.61361
16,logistic,"(40, 20, 40)",constant,0.01,4.691565,198.918184
1,tanh,"(50, 50)",constant,0.005,4.746723,250.788942
3,tanh,"(50, 50)",invscaling,0.005,4.746723,259.115716


  pd.reset_option('all')


In [10]:
param_grid3 = {'clf__hidden_layer_sizes' : [(200,),
                                            (100, 100),
                                           (80, 40, 80)]
              }

In [11]:
pipe3 = Pipeline([
    ('scale', StandardScaler()),
    ('clf', MLPRegressor(random_state=42, max_iter=500, n_iter_no_change=30, solver='adam', activation='logistic', learning_rate_init=0.005))
])

In [12]:
gridsearch3 = GridSearchCV(pipe3, param_grid3, scoring=score, cv=2, verbose=2)
gridsearch3.fit(X_train, y_train)

Fitting 2 folds for each of 3 candidates, totalling 6 fits
[CV] END .....................clf__hidden_layer_sizes=(200,); total time=10.7min
[CV] END .....................clf__hidden_layer_sizes=(200,); total time= 8.8min
[CV] END .................clf__hidden_layer_sizes=(100, 100); total time=16.3min
[CV] END .................clf__hidden_layer_sizes=(100, 100); total time=21.0min
[CV] END ...............clf__hidden_layer_sizes=(80, 40, 80); total time=21.6min
[CV] END ...............clf__hidden_layer_sizes=(80, 40, 80); total time=14.9min


In [13]:
results3 = pd.concat([pd.DataFrame(gridsearch3.cv_results_["params"]),
                     -pd.DataFrame(gridsearch3.cv_results_["mean_test_score"], columns=["MED"]),
                     pd.DataFrame(gridsearch3.cv_results_["mean_fit_time"], columns=["Time"])],
                     axis=1)
df_results3 = results3.sort_values('MED')

# Permanently changes the pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
 
# All dataframes hereafter reflect these changes.
display(df_results3)

pd.reset_option('all')

Unnamed: 0,clf__hidden_layer_sizes,MED,Time
1,"(100, 100)",4.233812,1118.01134
2,"(80, 40, 80)",4.266247,1091.224255
0,"(200,)",4.429614,585.802227


  pd.reset_option('all')
