# MLPRegressor

In [43]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import r2_score
from statsmodels.tools.eval_measures import stde


## Read the etl info results

In [34]:
df_info = pd.read_csv('../dataset_clean/options_csv_v1_etl.csv')
df_info

Unnamed: 0,generic_features,remove_atypical_values,feature_combination,remove_feature_selection,remove_time_features,remove_invalid_correlated_features
0,False,False,degree 2 polynomial,False,True,False


## Read the dataset

In [35]:
df = pd.read_csv('../dataset_clean/PlatteRiverWeir_features_v1_clean.csv')
df

Unnamed: 0,Stage,Discharge,exposure,fNumber,isoSpeed,shutterSpeed,grayMean,graySigma,entropyMean,entropySigma,...,WwCurveLineMin^2,WwCurveLineMin WwCurveLineMax,WwCurveLineMin WwCurveLineMean,WwCurveLineMin WwCurveLineSigma,WwCurveLineMax^2,WwCurveLineMax WwCurveLineMean,WwCurveLineMax WwCurveLineSigma,WwCurveLineMean^2,WwCurveLineMean WwCurveLineSigma,WwCurveLineSigma^2
0,2.99,916.0,0.000250,4.0,200.0,-1.0,97.405096,39.623303,0.203417,0.979825,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
1,2.99,916.0,0.000312,4.0,200.0,-1.0,104.066757,40.179745,0.206835,1.002624,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
2,2.96,873.0,0.000312,4.0,200.0,-1.0,105.636831,40.533218,0.204756,0.994246,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
3,2.94,846.0,0.000312,4.0,200.0,-1.0,104.418949,41.752678,0.202428,0.983170,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
4,2.94,846.0,0.000312,4.0,200.0,-1.0,106.763541,44.442097,0.202661,0.989625,...,0.0,0.0,0.0,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42054,2.54,434.0,0.000312,4.0,200.0,-1.0,82.872720,57.702652,0.221708,1.076393,...,0.0,0.0,0.0,0.0,4.911907e+09,2.631754e+09,1.152506e+09,1.410070e+09,6.175020e+08,2.704183e+08
42055,2.54,434.0,0.000250,4.0,200.0,-1.0,89.028383,55.840861,0.233168,1.124774,...,0.0,0.0,0.0,0.0,4.908544e+09,2.760217e+09,1.121607e+09,1.552150e+09,6.307123e+08,2.562883e+08
42056,2.54,434.0,0.000250,4.0,200.0,-1.0,94.722097,54.355753,0.240722,1.151833,...,0.0,0.0,0.0,0.0,5.827032e+09,3.156453e+09,1.335051e+09,1.709823e+09,7.231858e+08,3.058782e+08
42057,2.54,434.0,0.000312,4.0,200.0,-1.0,96.693270,52.787629,0.244789,1.171987,...,0.0,0.0,0.0,0.0,6.222370e+09,3.514502e+09,1.441040e+09,1.985052e+09,8.139242e+08,3.337306e+08


## Divide dataset to X and Y

In [36]:
y = df[["Stage", "Discharge"]]
X = df.drop(columns=["Stage", "Discharge"])

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

## Train model

In [38]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', MLPRegressor())
])

param_grid = {'clf__hidden_layer_sizes': [(10), (10, 20), (10, 5, 15)], 'clf__alpha': [np.arange(0.0001, 1, 10)], 'clf__learning_rate_init': np.arange(0.001, 10, 20)}

clf = RandomizedSearchCV(pipeline, param_distributions=param_grid, n_iter=3, n_jobs=6, verbose=3)

In [39]:
clf.fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits




[CV 4/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=10, clf__learning_rate_init=0.001;, score=0.775 total time= 1.1min
[CV 5/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=10, clf__learning_rate_init=0.001;, score=0.771 total time= 1.1min
[CV 3/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=10, clf__learning_rate_init=0.001;, score=0.755 total time= 1.1min
[CV 1/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=10, clf__learning_rate_init=0.001;, score=0.754 total time= 1.1min
[CV 2/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=10, clf__learning_rate_init=0.001;, score=0.762 total time= 1.1min




[CV 1/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 20), clf__learning_rate_init=0.001;, score=0.748 total time= 1.2min




[CV 5/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 20), clf__learning_rate_init=0.001;, score=0.828 total time= 1.1min
[CV 4/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 20), clf__learning_rate_init=0.001;, score=0.758 total time= 1.1min
[CV 2/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 20), clf__learning_rate_init=0.001;, score=0.785 total time= 1.1min




[CV 3/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 20), clf__learning_rate_init=0.001;, score=0.814 total time= 1.1min




[CV 1/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 5, 15), clf__learning_rate_init=0.001;, score=0.836 total time= 1.1min




[CV 2/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 5, 15), clf__learning_rate_init=0.001;, score=0.832 total time= 1.1min




[CV 3/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 5, 15), clf__learning_rate_init=0.001;, score=0.823 total time=  54.2s
[CV 4/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 5, 15), clf__learning_rate_init=0.001;, score=0.817 total time=  54.1s
[CV 5/5] END clf__alpha=[0.0001], clf__hidden_layer_sizes=(10, 5, 15), clf__learning_rate_init=0.001;, score=0.837 total time=  54.1s




In [40]:
clf.best_score_

0.8290972420571633

In [41]:
clf.best_params_

{'clf__learning_rate_init': 0.001,
 'clf__hidden_layer_sizes': (10, 5, 15),
 'clf__alpha': array([0.0001])}

## Test model

In [42]:
clf.score(X_test, y_test)


0.6746941413334422

In [None]:
y_pred = clf.predict(X_test)

In [44]:
print("R^2: ", r2_score(y_test, y_pred))
print("Error estandar: ", stde(y_test.squeeze(),
      y_pred.squeeze(), ddof=len(X.columns) + 1))


R^2:  0.6746941413334422
Error estandar:  [  0.59001592 515.73848425]
