In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score


In [17]:
X, y = make_blobs(
        n_samples    = 500,
        n_features   = 2,
        centers      = 3,
        cluster_std  = 1.2,
        shuffle      = True,
        random_state = 42
       )


In [19]:
X.shape, y.shape

((500, 2), (500,))

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [21]:
X_train.shape, X_test.shape

((400, 2), (100, 2))

In [None]:
unique, counts = np.unique(y, return_counts=True)
print(np.asarray((unique, counts)).T)


[[  0 167]
 [  1 167]
 [  2 166]]


In [None]:
def hidden_layers_list(neurons, hidden_layers):
  """
   Get all possible combinations of pairs of elements from two vectors.
   Parameters:
   - neurons: List, Neurons.
   - hidden_layers: List containing hidden layers.
   Returns:
   - List of tuples, each tuple representing a pair of elements from the two vectors.
   """
  combinations = []

  for neuron in neurons:
      for hl in hidden_layers:
          combinations.append(tuple([neuron]*hl))

  return combinations


In [22]:
steps = [
    ('scaler', MinMaxScaler()),
    ('clf', MLPClassifier(random_state=42, max_iter=300))
]
pipe_mlp = Pipeline(steps)

neurons = [3, 5, 10, 15, 20, 25]
hidden_layer = [1,2,3,4]
hidden_layers_param = hidden_layers_list(neurons, hidden_layer)

param_grid = {
    'clf__hidden_layer_sizes': hidden_layers_param,
}

cv_value = 5 # 5-KFold Cross validation
gs_mlp = GridSearchCV(
    estimator=pipe_mlp,
    param_grid = param_grid,
    cv=cv_value,
    scoring='accuracy',
    refit=True,
    n_jobs=-1,
    verbose = 1,
    return_train_score=True # set this for train score
)

gs_mlp.fit(X_train, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits




In [23]:
y_pred_train = gs_mlp.predict(X_train)
print(f'Accuracy: {accuracy_score(y_train, y_pred_train)}')

Accuracy: 1.0


In [24]:
y_pred_test = gs_mlp.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred_test)}')

Accuracy: 1.0


In [25]:
gs_mlp.cv_results_

{'mean_fit_time': array([0.43053875, 0.84890842, 0.86560569, 1.29743619, 0.77796187,
        0.69176598, 0.88309011, 1.74852543, 1.12855883, 0.84437685,
        1.21329308, 2.05396795, 1.02991786, 1.04563351, 0.94903092,
        0.86955142, 1.06639171, 0.56356773, 0.69395661, 0.59654579,
        0.45214777, 0.56975131, 0.48127942, 0.48725338]),
 'std_fit_time': array([0.08726162, 0.09184071, 0.13924325, 0.2131891 , 0.04586801,
        0.11372512, 0.18039935, 0.1865295 , 0.16017687, 0.10439993,
        0.30298808, 0.62592983, 0.11716854, 0.14995888, 0.11362291,
        0.3296251 , 0.24683363, 0.01211216, 0.08120059, 0.12193948,
        0.01393026, 0.01809812, 0.01467141, 0.03018862]),
 'mean_score_time': array([0.00318022, 0.00655689, 0.00281219, 0.0035202 , 0.00414038,
        0.00174155, 0.00351758, 0.00920601, 0.00472159, 0.00405612,
        0.00431027, 0.00475569, 0.00424237, 0.00316315, 0.00323143,
        0.00329804, 0.00791316, 0.00158362, 0.00162959, 0.00165305,
        0.002009

In [30]:
print(gs_mlp.best_score_)
print(gs_mlp.best_params_)

1.0
{'clf__hidden_layer_sizes': (10,)}


In [36]:
cv_results = pd.DataFrame(gs_mlp.cv_results_)

cv_results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf__hidden_layer_sizes,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,0.430539,0.087262,0.00318,0.003287,"(3,)","{'clf__hidden_layer_sizes': (3,)}",1.0,1.0,0.975,1.0,...,0.99,0.012247,18,1.0,1.0,0.99375,0.984375,0.975,0.990625,0.009682
1,0.848908,0.091841,0.006557,0.003957,"(3, 3)","{'clf__hidden_layer_sizes': (3, 3)}",0.8375,0.8,0.8125,0.8375,...,0.8275,0.018371,21,0.834375,0.78125,0.865625,0.896875,0.828125,0.84125,0.03875
2,0.865606,0.139243,0.002812,0.002034,"(3, 3, 3)","{'clf__hidden_layer_sizes': (3, 3, 3)}",0.6875,0.6875,0.6875,0.6875,...,0.685,0.005,23,0.684375,0.684375,0.684375,0.684375,0.6875,0.685,0.00125
3,1.297436,0.213189,0.00352,0.003016,"(3, 3, 3, 3)","{'clf__hidden_layer_sizes': (3, 3, 3, 3)}",0.9375,0.975,0.9125,0.9875,...,0.955,0.026926,19,0.93125,0.95,0.95,0.984375,0.93125,0.949375,0.019405
4,0.777962,0.045868,0.00414,0.005393,"(5,)","{'clf__hidden_layer_sizes': (5,)}",0.9875,1.0,0.9875,1.0,...,0.995,0.006124,17,0.996875,0.996875,0.996875,1.0,1.0,0.998125,0.001531
5,0.691766,0.113725,0.001742,0.000307,"(5, 5)","{'clf__hidden_layer_sizes': (5, 5)}",0.6625,0.6625,0.6625,0.6625,...,0.6625,0.0,24,0.6625,0.6625,0.6625,0.6625,0.6625,0.6625,0.0
6,0.88309,0.180399,0.003518,0.003659,"(5, 5, 5)","{'clf__hidden_layer_sizes': (5, 5, 5)}",0.7625,0.7375,0.675,0.6625,...,0.7,0.041833,22,0.778125,0.715625,0.69375,0.6625,0.6625,0.7025,0.04282
7,1.748525,0.18653,0.009206,0.007369,"(5, 5, 5, 5)","{'clf__hidden_layer_sizes': (5, 5, 5, 5)}",0.925,0.8875,0.8625,0.925,...,0.9,0.023717,20,0.9,0.89375,0.8875,0.921875,0.915625,0.90375,0.01302
8,1.128559,0.160177,0.004722,0.005287,"(10,)","{'clf__hidden_layer_sizes': (10,)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
9,0.844377,0.1044,0.004056,0.004135,"(10, 10)","{'clf__hidden_layer_sizes': (10, 10)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [35]:
cv_results.loc[cv_results["param_clf__hidden_layer_sizes"] == (10,) ]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf__hidden_layer_sizes,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
8,1.128559,0.160177,0.004722,0.005287,"(10,)","{'clf__hidden_layer_sizes': (10,)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [None]:
print(cv_results['mean_train_score'])
print(cv_results['mean_test_score'])

In [37]:
cv_results.sort_values(by=['mean_fit_time', 'std_fit_time'])

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_clf__hidden_layer_sizes,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,0.430539,0.087262,0.00318,0.003287,"(3,)","{'clf__hidden_layer_sizes': (3,)}",1.0,1.0,0.975,1.0,...,0.99,0.012247,18,1.0,1.0,0.99375,0.984375,0.975,0.990625,0.009682
20,0.452148,0.01393,0.002009,0.000277,"(25,)","{'clf__hidden_layer_sizes': (25,)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
22,0.481279,0.014671,0.001573,4.4e-05,"(25, 25, 25)","{'clf__hidden_layer_sizes': (25, 25, 25)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
23,0.487253,0.030189,0.001724,0.000501,"(25, 25, 25, 25)","{'clf__hidden_layer_sizes': (25, 25, 25, 25)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
17,0.563568,0.012112,0.001584,3.4e-05,"(20, 20)","{'clf__hidden_layer_sizes': (20, 20)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
21,0.569751,0.018098,0.001638,0.000119,"(25, 25)","{'clf__hidden_layer_sizes': (25, 25)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
19,0.596546,0.121939,0.001653,0.000166,"(20, 20, 20, 20)","{'clf__hidden_layer_sizes': (20, 20, 20, 20)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
5,0.691766,0.113725,0.001742,0.000307,"(5, 5)","{'clf__hidden_layer_sizes': (5, 5)}",0.6625,0.6625,0.6625,0.6625,...,0.6625,0.0,24,0.6625,0.6625,0.6625,0.6625,0.6625,0.6625,0.0
18,0.693957,0.081201,0.00163,9.8e-05,"(20, 20, 20)","{'clf__hidden_layer_sizes': (20, 20, 20)}",1.0,1.0,1.0,1.0,...,1.0,0.0,1,1.0,1.0,1.0,1.0,1.0,1.0,0.0
4,0.777962,0.045868,0.00414,0.005393,"(5,)","{'clf__hidden_layer_sizes': (5,)}",0.9875,1.0,0.9875,1.0,...,0.995,0.006124,17,0.996875,0.996875,0.996875,1.0,1.0,0.998125,0.001531


En este caso, el mejor resultado de acuerdo con gridsearchcv es con una capa de 10 neuronas. Sin embargo, no es el resultado que mejor gasto computacional tiene (observando el mean fit time). Curiosamente un modelo un poco más complejo, con 25 neuronas y una sola capa oculta obtiene los mismos resultados en  la validación cruzada que en el primer caso, pero su mean fit time es menor.

In [38]:
steps = [
    ('scaler', MinMaxScaler()),
    ('clf', MLPClassifier(hidden_layer_sizes = (25,), random_state=42, max_iter=300))
]
pipe_mlp = Pipeline(steps)

pipe_mlp.fit(X_train, y_train)



In [39]:
accuracy_score(y_test, pipe_mlp.predict(X_test))

1.0

Observamos que tiene la misma capacidad predictiva que en el caso con 10 neuronas y una sola capa.

# Conclusión

La red neuronal con 10 neuronas y una capa oculta tiene la misma capacidad predictiva que la de 25 neuronas y una sola capa, pero según gridseaarch, la primera tiene mayor coste computacional que la segunda (un misterio).