# Hyperparametrización

## Carga de librerias

In [6]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import Ridge, Lasso, ElasticNet, BayesianRidge, Lars

## Carga de datos

In [7]:
df = pd.read_csv("/Users/gblasd/Documents/DataScience/02_SupervisedLearning/data/real_state/Real_estate.csv")
df.shape

(414, 8)

In [9]:
df.head(5)

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,1,2012.917,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833,5.0,390.5684,5,24.97937,121.54245,43.1


In [10]:
df.columns = ['no','date','house_age','distanceToTheNearestMRTstation',
       'Number OfConvenience stores', 'latitude', 'longitude',
       'house_price']

df.head()

Unnamed: 0,no,date,house_age,distanceToTheNearestMRTstation,Number OfConvenience stores,latitude,longitude,house_price
0,1,2012.917,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833,5.0,390.5684,5,24.97937,121.54245,43.1


In [11]:
X = df[['house_age','distanceToTheNearestMRTstation',
       'Number OfConvenience stores', 'latitude', 'longitude']]
y = df[['house_price']]

In [12]:
df.shape

(414, 8)

In [13]:
df.describe()

Unnamed: 0,no,date,house_age,distanceToTheNearestMRTstation,Number OfConvenience stores,latitude,longitude,house_price
count,414.0,414.0,414.0,414.0,414.0,414.0,414.0,414.0
mean,207.5,2013.148971,17.71256,1083.885689,4.094203,24.96903,121.533361,37.980193
std,119.655756,0.281967,11.392485,1262.109595,2.945562,0.01241,0.015347,13.606488
min,1.0,2012.667,0.0,23.38284,0.0,24.93207,121.47353,7.6
25%,104.25,2012.917,9.025,289.3248,1.0,24.963,121.528085,27.7
50%,207.5,2013.167,16.1,492.2313,4.0,24.9711,121.53863,38.45
75%,310.75,2013.417,28.15,1454.279,6.0,24.977455,121.543305,46.6
max,414.0,2013.583,43.8,6488.021,10.0,25.01459,121.56627,117.5


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [15]:
X_train.head()

Unnamed: 0,house_age,distanceToTheNearestMRTstation,Number OfConvenience stores,latitude,longitude
181,11.6,201.8939,8,24.98489,121.54121
98,16.4,289.3248,5,24.98203,121.54348
46,21.7,463.9623,9,24.9703,121.54458
176,13.9,4573.779,0,24.94867,121.49507
231,16.2,4074.736,0,24.94235,121.50357


In [16]:
y_train.head()

Unnamed: 0,house_price
181,55.9
98,51.0
46,42.0
176,19.2
231,14.7


In [32]:
dc_scores = {}

## Modelado

### Lasso 

In [21]:
model = Lasso()
model

In [20]:
model.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [23]:
# fit the model
model.fit(X_train, y_train)

# cross-validation
ls_medias = cross_val_score(estimator=model, X=X_test, y=y_test, cv=4, n_jobs=-1, scoring="r2")

print("MEAN: ", np.mean(ls_medias))
print("STD:  ", np.std(ls_medias))


MEAN:  0.45148208208992463
STD:   0.13971348298633804


In [25]:
#Combinación de parámetros
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "tol": [0.00001, 0.0000001, 0.01],
    "selection": ['cyclic', 'random']
}

In [26]:
# Espacio para hyperparametros
np.prod( list( map(len, param_grid.values()) ) )

np.int64(654)

In [27]:
clf = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=4,
    error_score=-1000,
    n_jobs=-1,
    scoring="r2",
    verbose=5
)

clf.fit(X_train, y_train)

print("Best score: " + str(clf.best_score_))

Fitting 4 folds for each of 654 candidates, totalling 2616 fits
[CV 3/4] END alpha=1, selection=cyclic, tol=1e-07;, score=0.553 total time=   0.0s
[CV 4/4] END alpha=1, selection=cyclic, tol=1e-05;, score=0.684 total time=   0.0s
[CV 1/4] END alpha=1, selection=cyclic, tol=1e-07;, score=0.632 total time=   0.0s
[CV 3/4] END alpha=1, selection=cyclic, tol=1e-05;, score=0.553 total time=   0.0s
[CV 2/4] END alpha=1, selection=cyclic, tol=1e-07;, score=0.448 total time=   0.0s
[CV 4/4] END alpha=1, selection=cyclic, tol=1e-07;, score=0.684 total time=   0.0s
[CV 1/4] END alpha=1, selection=cyclic, tol=1e-05;, score=0.632 total time=   0.0s
[CV 2/4] END alpha=1, selection=cyclic, tol=1e-05;, score=0.448 total time=   0.0s
[CV 2/4] END alpha=1, selection=cyclic, tol=0.01;, score=0.448 total time=   0.0s
[CV 1/4] END alpha=1, selection=cyclic, tol=0.01;, score=0.632 total time=   0.0s
[CV 4/4] END alpha=1, selection=cyclic, tol=0.01;, score=0.684 total time=   0.0s
[CV 3/4] END alpha=1, sele

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  model =

In [29]:
summary = pd.DataFrame(clf.cv_results_)
summary.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_selection,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003086,0.000962,0.003389,0.00212,1.0,cyclic,1e-05,"{'alpha': 1, 'selection': 'cyclic', 'tol': 1e-05}",0.632251,0.448217,0.552613,0.683607,0.579172,0.088852,63
1,0.003662,0.000489,0.002285,0.001392,1.0,cyclic,1e-07,"{'alpha': 1, 'selection': 'cyclic', 'tol': 1e-07}",0.632251,0.448217,0.552614,0.683607,0.579172,0.088852,62
2,0.008002,0.003832,0.006441,0.003375,1.0,cyclic,0.01,"{'alpha': 1, 'selection': 'cyclic', 'tol': 0.01}",0.632248,0.448236,0.552545,0.683612,0.57916,0.088851,64
3,0.006544,0.00316,0.002602,0.000756,1.0,random,1e-05,"{'alpha': 1, 'selection': 'random', 'tol': 1e-05}",0.632252,0.448216,0.552623,0.683608,0.579175,0.088852,60
4,0.005806,0.0022,0.002008,0.000507,1.0,random,1e-07,"{'alpha': 1, 'selection': 'random', 'tol': 1e-07}",0.632251,0.448217,0.552614,0.683607,0.579172,0.088852,61


In [31]:
summary.sort_values(by='rank_test_score').head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_selection,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
598,0.002392,0.000742,0.001112,0.000904,0.0,random,1e-07,"{'alpha': 0.0, 'selection': 'random', 'tol': 1...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,1
599,0.002281,0.000552,0.000602,4e-05,0.0,random,0.01,"{'alpha': 0.0, 'selection': 'random', 'tol': 0...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,2
597,0.003612,0.000345,0.002409,0.001486,0.0,random,1e-05,"{'alpha': 0.0, 'selection': 'random', 'tol': 1...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,3
595,0.002193,0.00059,0.000878,0.000358,0.0,cyclic,1e-07,"{'alpha': 0.0, 'selection': 'cyclic', 'tol': 1...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,4
596,0.002482,0.001057,0.001003,0.000798,0.0,cyclic,0.01,"{'alpha': 0.0, 'selection': 'cyclic', 'tol': 0...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,4


In [33]:
dc_scores[str(model).split("(")[0]] = {"model": clf.best_estimator_, "score": clf.best_score_}

dc_scores

{'Lasso': {'model': Lasso(alpha=0.0, selection='random', tol=1e-07),
  'score': np.float64(0.6180986489691076)}}

### Ridge

In [34]:
model = Ridge()
model

In [35]:
model.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'positive': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.0001}

In [36]:
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "tol": [0.00001, 0.0000001, 0.01],
    "solver": ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
}

In [37]:
np.prod( list( map(len, param_grid.values()) ) )

np.int64(2289)

In [38]:
clf = GridSearchCV(
    estimator=model, 
    param_grid=param_grid, 
    cv=4, 
    error_score=-1000, 
    n_jobs=-1, 
    scoring="r2")

clf.fit(X_train, y_train)

print("Best score: " + str(clf.best_score_))



Best score: 0.6180986767784739


In [42]:
summary = pd.DataFrame(clf.cv_results_)
summary.sort_values(by="rank_test_score")

dc_scores[str(model).split("(")[0]] = {"model":clf.best_estimator_, "score":clf.best_score_}
dc_scores

{'Lasso': {'model': Lasso(alpha=0.0, selection='random', tol=1e-07),
  'score': np.float64(0.6180986489691076)},
 'Ridge': {'model': Ridge(alpha=0.0, solver='lsqr', tol=1e-07),
  'score': np.float64(0.6180986767784739)}}

### ElasticNet

In [44]:
model = ElasticNet()
model

In [46]:
model.fit(X_train, y_train)
ls_medias = cross_val_score(estimator=model, X=X_test, y=y_test, cv=4, n_jobs=-1, scoring="r2")

print("MEAN: ", np.mean(ls_medias))
print("STD:  ", np.std(ls_medias))

MEAN:  0.45248826521608165
STD:   0.14010309557915013


In [47]:
model.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'l1_ratio': 0.5,
 'max_iter': 1000,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [48]:
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "l1_ratio": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "selection": ["cyclic", "random"]
}

In [49]:
np.prod(list(map(len, param_grid.values())))

np.int64(23762)

In [None]:
clf = GridSearchCV(model, param_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2", verbose=5)
clf.fit(X_train, y_train)
print("Best score: " + str(clf.best_score_))

In [54]:
summary = pd.DataFrame(clf.cv_results_)
summary.sort_values(by="rank_test_score").head(5)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,param_selection,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
21783,0.003969,0.002315,0.000696,0.000176,0.0,0.1,random,"{'alpha': 0.0, 'l1_ratio': 0.1, 'selection': '...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,1
21781,0.002239,0.000842,0.000802,0.000426,0.0,0.0,random,"{'alpha': 0.0, 'l1_ratio': 0.0, 'selection': '...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,2
21793,0.002442,0.000929,0.001178,0.000667,0.0,0.6,random,"{'alpha': 0.0, 'l1_ratio': 0.6, 'selection': '...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,3
21799,0.003064,0.001244,0.000915,0.000382,0.0,0.9,random,"{'alpha': 0.0, 'l1_ratio': 0.9, 'selection': '...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,4
21789,0.002362,0.000339,0.001138,0.000499,0.0,0.4,random,"{'alpha': 0.0, 'l1_ratio': 0.4, 'selection': '...",0.639538,0.536471,0.645582,0.650805,0.618099,0.047296,5


In [55]:
dc_scores[str(model).split("(")[0]] = {"model": clf.best_estimator_, "score": clf.best_score_}
dc_scores

{'Lasso': {'model': Lasso(alpha=0.0, selection='random', tol=1e-07),
  'score': np.float64(0.6180986489691076)},
 'Ridge': {'model': Ridge(alpha=0.0, solver='lsqr', tol=1e-07),
  'score': np.float64(0.6180986767784739)},
 'ElasticNet': {'model': ElasticNet(alpha=0.0, l1_ratio=0.1, selection='random'),
  'score': np.float64(0.6180986489691137)}}

In [56]:
clf = RandomizedSearchCV(n_iter=2000, estimator=model, param_distributions=param_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2", verbose=5)
clf.fit(X_train, y_train)
print("Best score: " + str(clf.best_score_))

Fitting 4 folds for each of 2000 candidates, totalling 8000 fits
[CV 1/4] END alpha=89, l1_ratio=4, selection=random;, score=-1000.000 total time=   0.0s
[CV 2/4] END alpha=89, l1_ratio=4, selection=random;, score=-1000.000 total time=   0.0s
[CV 3/4] END alpha=89, l1_ratio=4, selection=random;, score=-1000.000 total time=   0.0s
[CV 4/4] END alpha=89, l1_ratio=4, selection=random;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=15, l1_ratio=0.7, selection=cyclic;, score=0.537 total time=   0.0s
[CV 4/4] END alpha=15, l1_ratio=0.7, selection=cyclic;, score=0.606 total time=   0.0s
[CV 2/4] END alpha=15, l1_ratio=0.7, selection=cyclic;, score=0.467 total time=   0.0s
[CV 2/4] END alpha=95, l1_ratio=99, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=35, l1_ratio=10, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 2/4] END alpha=35, l1_ratio=10, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 3/4] END alpha=95, l1_ratio=99, selection=cy

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

atio=67, selection=random;, score=-1000.000 total time=   0.0s
[CV 2/4] END alpha=94, l1_ratio=10, selection=random;, score=-1000.000 total time=   0.0s[CV 3/4] END alpha=16, l1_ratio=21, selection=random;, score=-1000.000 total time=   0.0s

[CV 1/4] END alpha=21, l1_ratio=60, selection=random;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=92, l1_ratio=72, selection=random;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=72, l1_ratio=26, selection=random;, score=-1000.000 total time=   0.0s
[CV 4/4] END alpha=16, l1_ratio=21, selection=random;, score=-1000.000 total time=   0.0s
[CV 4/4] END alpha=67, l1_ratio=0.3, selection=random;, score=0.581 total time=   0.0s
[CV 2/4] END alpha=92, l1_ratio=72, selection=random;, score=-1000.000 total time=   0.0s
[CV 2/4] END alpha=72, l1_ratio=26, selection=random;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=90, l1_ratio=35, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 3/4] END alpha=92, l1_ratio=72, sele

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/4] END alpha=98, l1_ratio=0.9, selection=random;, score=0.464 total time=   0.0s
[CV 4/4] END alpha=18, l1_ratio=18, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=79, l1_ratio=59, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 4/4] END alpha=11, l1_ratio=0.2, selection=random;, score=0.644 total time=   0.0s
[CV 1/4] END alpha=79, l1_ratio=44, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 2/4] END alpha=79, l1_ratio=59, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 2/4] END alpha=79, l1_ratio=44, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=0.7, l1_ratio=7, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=0.1, l1_ratio=23, selection=random;, score=-1000.000 total time=   0.0s[CV 3/4] END alpha=79, l1_ratio=59, selection=cyclic;, score=-1000.000 total time=   0.0s

[CV 3/4] END alpha=79, l1_ratio=44, selection=cyclic;, score=-1000.000 total time=   0.0s
[CV 1/4] END al

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  return fit_method(estimator, *args, **kwargs)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_

[CV 1/4] END alpha=77, l1_ratio=17, selection=random;, score=-1000.000 total time=   0.0s
[CV 4/4] END alpha=81, l1_ratio=0.0, selection=cyclic;, score=0.615 total time=   0.0s
[CV 2/4] END alpha=46, l1_ratio=85, selection=random;, score=-1000.000 total time=   0.0s
[CV 2/4] END alpha=77, l1_ratio=17, selection=random;, score=-1000.000 total time=   0.0s
[CV 3/4] END alpha=46, l1_ratio=85, selection=random;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=0.0, l1_ratio=31, selection=random;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=14, l1_ratio=0.0, selection=random;, score=0.585 total time=   0.0s
[CV 3/4] END alpha=77, l1_ratio=17, selection=random;, score=-1000.000 total time=   0.0s
[CV 2/4] END alpha=0.0, l1_ratio=31, selection=random;, score=-1000.000 total time=   0.0s
[CV 4/4] END alpha=77, l1_ratio=17, selection=random;, score=-1000.000 total time=   0.0s
[CV 1/4] END alpha=16, l1_ratio=39, selection=random;, score=-1000.000 total time=   0.0s
[CV 4/4] END a

7172 fits failed out of a total of 8000.
The score on these train-test partitions for these parameters will be set to -1000.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
116 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/gblasd/Documents/DataScience/.venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/gblasd/Documents/DataScience/.venv/lib/python3.13/site-packages/sklearn/base.py", line 1382, in wrapper
    estimator._validate_params()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "/Users/gblasd/Documents/DataScience/.venv/lib/python3.13/site-packages/sklearn/base.py", line 436, in _validate_params
    validate_parame

In [57]:
print("Best score: " + str(clf.best_score_))

Best score: 0.6180986489691007


In [58]:
# Mejor modelo
clf.best_estimator_

In [59]:
# Mejores parametros
clf.best_params_

{'selection': 'random', 'l1_ratio': 0.4, 'alpha': 0.0}

In [60]:
# Mejor score
clf.best_score_

np.float64(0.6180986489691007)

## Preservación y consumo del modelo

In [61]:
import pickle

### Método 1

In [65]:
PATH = "/Users/gblasd/Documents/DataScience/02_SupervisedLearning/models/"
NAME_MODEL = "best_model.pickle"
pickle.dump(obj=dc_scores["Ridge"]["model"], file=open(PATH + NAME_MODEL, "wb"))
pickle.load(file=open(PATH + NAME_MODEL, "rb"))

### Método 2

In [66]:
PATH = "/Users/gblasd/Documents/DataScience/02_SupervisedLearning/models/"
NAME_MODEL = "best_model_v2.pickle"

pd.to_pickle(clf.best_estimator_, PATH + NAME_MODEL)
model = pd.read_pickle(PATH + NAME_MODEL)
model