# Selección Manual de Parámetros

### (en un modelo de Machine Learning)

In [2]:
# Importamos Libs

# (En este caso usaremos Redes Neuronales pero este programa es adaptable a multitud de modelos estimadores)

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import pandas as pd

In [3]:
# Leemos dataset

data = pd.read_csv("../eggomPY_Datasets/AirlineDelays/DelayedFlights.csv")

In [5]:
# Defino las cols

col_pred = ["AirTime", "Distance", "TaxiIn", "TaxiOut", "DepDelay"]
col_target = "ArrDelay"

# Limpiamos el dataset de NaN

df = data.dropna(subset=col_pred)
df = df.dropna(subset=["ArrDelay"])

# Sampleamos el dataset y lo capamos a un total de 1000 filas.

df = df.sample(frac=1,replace=True, random_state=12).head(1000)

# Defino un conjunto X con los datos de predicción y un conjunto con los datos objetivo

X = df[col_pred]
y = df[col_target]

In [6]:
# Creo conjuntos de train y testing

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=12)

In [7]:
# Creo un transformador escalador

escalador = StandardScaler()

###  IMPORTANTE!!!  ##############################################################
###                                                                             ##
###  Fiteo el escalador con el conjunto de variables predictoras de TRAINING!!! ##

escalador.fit(X_train)

# Aplico el transformador escalador al conjunto de testing y training

X_train = escalador.transform(X_train)
X_test = escalador.transform(X_test)

In [10]:
# Creo unas listas con valores arbitrarios para ciertos parámetros del modelo.

# Recordar que en este caso el modelo es una red neuronal por tanto los hiperparámetros de mayor impacto serán:
#
# alphas: parametro de regularización/penalización (de la complejidad), para reducir posible overfitting.
# layers: capas de la red neuronal.
# solvers: método de solving (calculo numérico) dentro de la red neuronal para dar con la solución estable.

alphas = [0.000001, 0.0001, 0.01, 0.1]
layers = [2, 5, 10, 50, 100]
solvers = ["lbfgs", "adam"]

In [12]:
# En total generaremos 40 modelos (fruto de la combinación de hiperparámetros):

len(alphas)*len(layers)*len(solvers)

40

In [13]:
# Creamos el loop: (Soluc. rudimentaria pero efectiva)

for alpha in alphas:
    for layer in layers:
        for solver in solvers:
                clf = MLPRegressor(solver=solver, alpha=alpha, hidden_layer_sizes=(layer,), warm_start=True, max_iter=10000)
                model = clf.fit(X_train, y_train)
                y_guess = model.predict(X_test)
                print("R2 score: ",r2_score(y_test, y_guess),"Para Solver: ",solver," n_Layers: ",layer," Alpha: ",alpha)

R2 score:  0.9728416523843435 Para Solver:  lbfgs  n_Layers:  2  Alpha:  1e-06
R2 score:  0.9721800038097603 Para Solver:  adam  n_Layers:  2  Alpha:  1e-06
R2 score:  0.9765075417734168 Para Solver:  lbfgs  n_Layers:  5  Alpha:  1e-06
R2 score:  0.9723276810842592 Para Solver:  adam  n_Layers:  5  Alpha:  1e-06
R2 score:  0.9763500230429344 Para Solver:  lbfgs  n_Layers:  10  Alpha:  1e-06
R2 score:  0.9731723855774527 Para Solver:  adam  n_Layers:  10  Alpha:  1e-06
R2 score:  0.9551501266522242 Para Solver:  lbfgs  n_Layers:  50  Alpha:  1e-06
R2 score:  0.9763360388280711 Para Solver:  adam  n_Layers:  50  Alpha:  1e-06


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


R2 score:  0.8987194608406996 Para Solver:  lbfgs  n_Layers:  100  Alpha:  1e-06
R2 score:  0.9752994536593261 Para Solver:  adam  n_Layers:  100  Alpha:  1e-06
R2 score:  0.9731423476799245 Para Solver:  lbfgs  n_Layers:  2  Alpha:  0.0001




R2 score:  0.009256587581210063 Para Solver:  adam  n_Layers:  2  Alpha:  0.0001
R2 score:  0.9739554788998845 Para Solver:  lbfgs  n_Layers:  5  Alpha:  0.0001
R2 score:  0.9731618884889088 Para Solver:  adam  n_Layers:  5  Alpha:  0.0001
R2 score:  0.9754778142904855 Para Solver:  lbfgs  n_Layers:  10  Alpha:  0.0001
R2 score:  0.9746809698631999 Para Solver:  adam  n_Layers:  10  Alpha:  0.0001
R2 score:  0.962780915246534 Para Solver:  lbfgs  n_Layers:  50  Alpha:  0.0001
R2 score:  0.9752658448920241 Para Solver:  adam  n_Layers:  50  Alpha:  0.0001


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


R2 score:  0.8631933899160615 Para Solver:  lbfgs  n_Layers:  100  Alpha:  0.0001
R2 score:  0.974000198961827 Para Solver:  adam  n_Layers:  100  Alpha:  0.0001
R2 score:  0.9727183019903296 Para Solver:  lbfgs  n_Layers:  2  Alpha:  0.01




R2 score:  0.025284525489700282 Para Solver:  adam  n_Layers:  2  Alpha:  0.01
R2 score:  0.9787325262294504 Para Solver:  lbfgs  n_Layers:  5  Alpha:  0.01
R2 score:  0.9754218952195527 Para Solver:  adam  n_Layers:  5  Alpha:  0.01
R2 score:  0.9772680307579051 Para Solver:  lbfgs  n_Layers:  10  Alpha:  0.01
R2 score:  0.975434214954638 Para Solver:  adam  n_Layers:  10  Alpha:  0.01
R2 score:  0.9555692857115502 Para Solver:  lbfgs  n_Layers:  50  Alpha:  0.01
R2 score:  0.9749379021074807 Para Solver:  adam  n_Layers:  50  Alpha:  0.01
R2 score:  0.9022579037938081 Para Solver:  lbfgs  n_Layers:  100  Alpha:  0.01
R2 score:  0.9742697773308048 Para Solver:  adam  n_Layers:  100  Alpha:  0.01
R2 score:  0.9737727024932346 Para Solver:  lbfgs  n_Layers:  2  Alpha:  0.1
R2 score:  0.9723177335998678 Para Solver:  adam  n_Layers:  2  Alpha:  0.1
R2 score:  0.9794523835160901 Para Solver:  lbfgs  n_Layers:  5  Alpha:  0.1
R2 score:  0.9739505905153579 Para Solver:  adam  n_Layers:  5  

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


R2 score:  0.9230416044910194 Para Solver:  lbfgs  n_Layers:  100  Alpha:  0.1
R2 score:  0.9754043270757711 Para Solver:  adam  n_Layers:  100  Alpha:  0.1


Obviamente SciKit Learn tiene una manera más elegante de resolver esto sin tener que recurrir a estos loops...