In [1]:
import pandas as pd
import helpers.processing_helpers as ph

from sklearn.model_selection import RandomizedSearchCV

from sklearn.metrics import make_scorer

from sklearn.neural_network import  MLPRegressor

from sklearn.discriminant_analysis import StandardScaler
from sklearn.pipeline import Pipeline


In [2]:
df_dev = pd.read_csv("./dataset/development.csv")

In [3]:
noise_indexes = [0,7,12,15,16,17]
acc_idxs = [1,2,3,4,5,6,8,9,10,11,13,14]
features = ["pmax", "negpmax", 'area', 'tmax', 'rms']

sensors_removed = df_dev.drop(columns=ph.get_column_names(features, noise_indexes))
df = sensors_removed.drop(columns=ph.get_column_names(['tmax', 'rms', 'area'], acc_idxs))

In [4]:
df = df.sample(frac=1)

In [5]:
score = make_scorer(ph.mean_euclid_dist, greater_is_better=False)

In [6]:
# These will be the constant parameters

pipe = Pipeline([
    ('scale', StandardScaler()),
    ('clf', MLPRegressor(random_state=42, max_iter=200, n_iter_no_change=30, learning_rate_init=0.01))
])

In [7]:
y_train = df[['x', 'y']].copy()

X_train = df.drop(columns=['x', 'y'])

In [8]:
param_grid = {'clf__hidden_layer_sizes' : [(50,),
                                            (25, 25),
                                            (35, 15),
                                            (20, 10, 20),
                                            (25, 15, 10),
                                            (20, 15, 10, 5),
                                            (15, 10, 10, 15)],
            'clf__activation' : ['relu', 
                              'logistic', 
                              'tanh'],
            'clf__learning_rate_init': [0.01, 0.001],
            'clf__learning_rate': ['constant', 'invscaling', 'adaptive'],
            'clf__solver': ['adam', 'sgd', 'lbfgs']
              }

In [9]:
# n_iter is the amount of combinations to test
gridsearch = RandomizedSearchCV(pipe, param_grid, scoring=score, cv=3, verbose=2, n_iter=20)
gridsearch.fit(X_train, y_train)

Fitting 3 folds for each of 20 candidates, totalling 60 fits




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(50,), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=adam; total time= 1.5min




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(50,), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=adam; total time= 1.8min




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(50,), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=adam; total time= 1.7min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 2.9min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 3.0min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 2.9min




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.001, clf__solver=sgd; total time= 2.1min
[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.001, clf__solver=sgd; total time=  19.8s
[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.001, clf__solver=sgd; total time=  20.2s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time= 1.9min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time= 2.2min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time= 1.9min
[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time=  55.7s
[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time=  54.3s
[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time=  34.0s




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 2.9min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 3.0min




[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 2.9min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=adam; total time= 3.2min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=adam; total time= 1.8min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=adam; total time= 2.5min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time= 1.8min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time= 1.6min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time= 1.6min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=adaptive, clf__learning_rate_init=0.001, clf__solver=lbfgs; total time= 2.2min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=adaptive, clf__learning_rate_init=0.001, clf__solver=lbfgs; total time= 2.4min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=adaptive, clf__learning_rate_init=0.001, clf__solver=lbfgs; total time= 2.1min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time=  30.1s
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time=  24.6s
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 15, 10), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time=  23.7s
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=adam; total time= 2.9min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=adam; total time= 1.5min
[CV] 



[CV] END clf__activation=relu, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 2.1min




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 2.2min




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=adaptive, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 2.1min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time=  51.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time=  50.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.01, clf__solver=lbfgs; total time=  50.6s




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=adam; total time= 3.3min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=adam; total time= 3.6min




[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(15, 10, 10, 15), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=adam; total time= 4.2min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=lbfgs; total time= 2.7min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=lbfgs; total time= 2.8min


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(20, 15, 10, 5), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=lbfgs; total time= 2.8min




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=adam; total time= 2.8min




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=adam; total time= 2.7min




[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=adam; total time= 2.9min
[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time=  57.6s
[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 1.4min
[CV] END clf__activation=logistic, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=sgd; total time= 1.3min
[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=sgd; total time=  23.3s
[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=sgd; total time=  23.2s


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END clf__activation=relu, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=invscaling, clf__learning_rate_init=0.001, clf__solver=sgd; total time= 2.2min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=adam; total time= 2.8min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=adam; total time= 2.6min
[CV] END clf__activation=tanh, clf__hidden_layer_sizes=(25, 25), clf__learning_rate=constant, clf__learning_rate_init=0.01, clf__solver=adam; total time= 1.9min


1 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Arist\OneDrive\Skrivebord\DSL labs\DSL_project2\venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 890, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Arist\OneDrive\Skrivebord\DSL labs\DSL_project2\venv\Lib\site-packages\sklearn\base.py", line 1351, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Arist\OneDrive\Skrivebord\DSL labs\DSL_project2\venv\Lib\site-packages\sklearn\pipeline.py", line 475, in fit
    self._final_estimator.

In [11]:
results = pd.concat([pd.DataFrame(gridsearch.cv_results_["params"]),
                     -pd.DataFrame(gridsearch.cv_results_["mean_test_score"], columns=["MED"]),
                     pd.DataFrame(gridsearch.cv_results_["mean_fit_time"], columns=["Time"])],
                     axis=1)
df = results.sort_values('MED')

# Permanently changes the pandas settings
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
 
# All dataframes hereafter reflect these changes.
display(df)

pd.reset_option('all')

Unnamed: 0,clf__solver,clf__learning_rate_init,clf__learning_rate,clf__hidden_layer_sizes,clf__activation,MED,Time
14,adam,0.001,invscaling,"(15, 10, 10, 15)",logistic,5.268188,221.332109
19,adam,0.01,constant,"(25, 25)",tanh,5.386961,147.341025
10,adam,0.01,constant,"(20, 15, 10, 5)",tanh,5.876752,127.638233
6,adam,0.01,adaptive,"(20, 15, 10, 5)",tanh,5.876752,149.319709
16,adam,0.001,invscaling,"(25, 25)",relu,6.052179,168.044488
0,adam,0.01,invscaling,"(50,)",relu,6.130232,99.968494
13,lbfgs,0.01,invscaling,"(25, 25)",relu,8.083854,50.737288
7,lbfgs,0.01,invscaling,"(25, 25)",logistic,23.666507,100.68177
8,lbfgs,0.001,adaptive,"(25, 15, 10)",tanh,65.327722,133.476104
9,sgd,0.01,constant,"(25, 15, 10)",tanh,66.865929,26.034492


  pd.reset_option('all')
