## Clase 1: Algoritmos de Predicción con GridSearchCV
#### Profesor: Diego Miranda Olavarría
#### Data Scientist

In [1]:
import pandas as pd 
from sklearn.model_selection import GridSearchCV

data = pd.read_csv('Boston.csv')

data.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [2]:
data.shape

(506, 14)

In [3]:
X = data.drop('medv', axis=1)
y = data['medv']

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.8, random_state=42)

## Arbol de Decision

In [5]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV

# Definición del modelo y parámetros para Grid Search
dt_reg = DecisionTreeRegressor()
params_dt_reg = {
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Grid Search
grid_dt_reg = GridSearchCV(dt_reg, params_dt_reg, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_dt_reg.fit(X_train, y_train)

In [6]:
grid_dt_reg.predict(X_test)

array([22.2       , 33.175     , 18.11111111, 25.07777778, 16.3       ,
       23.        , 18.11111111, 13.85      , 19.8       , 21.54285714,
       18.11111111, 26.5       ,  8.1       , 21.54285714, 19.43333333,
       23.        , 20.95      ,  8.7375    , 42.32      , 14.98      ,
       24.5625    , 23.475     , 14.38571429, 24.73333333, 13.8       ,
       14.98      , 21.25      , 11.125     , 19.46666667, 20.3       ,
       16.3       , 22.2       , 25.5       , 19.43333333, 18.76666667,
       13.8       , 33.2       , 19.52222222, 20.85      , 25.07777778,
       18.76666667, 29.275     , 42.32      , 17.7       , 21.26666667,
       14.98      , 14.98      , 25.07777778, 13.8       , 33.175     ,
       23.34444444, 35.26666667, 14.98      , 29.275     , 44.75      ,
       23.        , 16.3       , 26.325     , 21.26666667, 20.85      ,
       24.225     , 33.2       , 30.35      , 17.7       , 26.325     ,
       14.38571429, 16.3       , 22.725     , 26.325     , 14.68

## Random Forest

In [7]:
from sklearn.ensemble import RandomForestRegressor

# Definición del modelo y parámetros para Grid Search
rf_reg = RandomForestRegressor()
params_rf_reg = {
    'n_estimators': [50, 150, 200],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Grid Search
grid_rf_reg = GridSearchCV(rf_reg, params_rf_reg, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_rf_reg.fit(X_train, y_train)

In [8]:
grid_rf_reg.predict(X_test)

array([23.07315704, 31.1551164 , 17.85409947, 23.44442593, 16.78372063,
       21.22490135, 18.9636381 , 15.50552063, 21.28139901, 21.10896278,
       20.50976825, 20.11450476,  8.36658829, 21.44413883, 19.45892624,
       25.03541217, 19.57133439,  8.51050945, 44.93224339, 15.19384524,
       24.70062037, 23.9387746 , 14.88289392, 23.94641032, 14.39727857,
       14.84476296, 21.77853528, 13.9884873 , 19.53029312, 21.24911919,
       20.43010952, 23.42010767, 28.6437    , 20.26370115, 14.36606825,
       15.90579762, 34.34936825, 19.28955979, 20.93225476, 24.01697989,
       19.75755079, 29.63745212, 45.45988466, 19.38307143, 22.42904098,
       14.14027593, 15.23058677, 24.35954021, 18.18998042, 28.77609048,
       21.04285982, 34.27732778, 16.65419788, 26.42661746, 45.21765952,
       21.58321613, 15.73191931, 32.41844048, 22.2750978 , 20.73826825,
       25.18946429, 34.14457672, 29.79810608, 18.31878095, 27.31813571,
       16.76572063, 14.14088413, 23.01733545, 28.48608968, 15.10

## XGBoost

In [9]:
import xgboost as xgb

# Definición del modelo y parámetros para Grid Search
xgboost_reg = xgb.XGBRegressor()
params_xgb_reg = {
    'n_estimators': [5, 10],
    'max_depth': [2, 4, 6],
    'learning_rate': [0.01, 0.1],
    'subsample': [0.5, 0.75, 1.0],
    'colsample_bytree': [0.5, 1.0],
    'gamma': [0, 0.1]
}

# Grid Search
grid_xgb_reg = GridSearchCV(xgboost_reg, params_xgb_reg, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_xgb_reg.fit(X_train, y_train)

In [10]:
grid_xgb_reg.predict(X_test)

array([23.216082, 26.836256, 19.498287, 23.073046, 19.06364 , 22.132101,
       20.701366, 18.124239, 21.460384, 21.992916, 20.493706, 20.560999,
       15.089882, 22.622465, 21.077442, 25.065306, 20.815857, 14.55797 ,
       36.51248 , 17.317175, 23.571587, 23.571587, 17.675554, 24.100346,
       18.033506, 17.738255, 21.874075, 17.190718, 20.44169 , 21.847366,
       21.126513, 23.09946 , 22.505863, 20.747278, 17.615839, 18.464037,
       31.38729 , 21.11789 , 22.405315, 23.25843 , 19.901173, 26.611914,
       37.72521 , 21.46601 , 23.066208, 17.390818, 18.474907, 23.25843 ,
       20.0006  , 24.314266, 21.658148, 29.632227, 19.015265, 24.799767,
       35.45668 , 21.65696 , 17.916174, 27.707432, 22.753052, 21.33489 ,
       24.312368, 30.163315, 24.75132 , 20.585968, 25.803282, 18.714746,
       16.526098, 23.108557, 25.840574, 17.976091, 21.674505, 25.562273,
       14.366176, 23.953478, 22.132101, 14.0937  , 21.282528, 37.72521 ,
       14.688771, 17.041803, 21.980545, 15.95635 , 

## Gradient Boosting

In [11]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

# Definición del modelo y parámetros para Grid Search
gb_reg = GradientBoostingRegressor()
params_gb = {
    'n_estimators': [5, 10, 15],  # Número de etapas de refuerzo a realizar
    'learning_rate': [0.01, 0.1, 0.2],  # Tasa de aprendizaje
    'max_depth': [3, 4, 5],  # Profundidad máxima de los estimadores de regresión
    'min_samples_split': [2, 4],  # Número mínimo de muestras requeridas para dividir un nodo
    'min_samples_leaf': [1, 2],  # Número mínimo de muestras requeridas en un nodo hoja
    'max_features': ['sqrt', 'log2', None]  # Número de características a considerar al buscar la mejor división
}

# Grid Search
grid_gb_reg = GridSearchCV(gb_reg, params_gb, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_gb_reg.fit(X_train, y_train)

In [12]:
grid_gb_reg.predict(X_test)

array([22.76573143, 30.94729199, 15.47315556, 22.59466653, 17.01466299,
       21.21354871, 19.76328551, 15.06929399, 20.99210627, 21.16335052,
       20.61554108, 18.90604419,  9.9674454 , 21.21354871, 21.38973353,
       23.06208993, 19.64155515,  9.63222696, 45.58697253, 15.30115977,
       25.63670661, 26.01341489, 15.12939702, 20.31246358, 15.10717509,
       16.31381662, 20.80712283, 14.03771628, 20.07253598, 21.16335052,
       21.30333832, 22.7158221 , 20.89230689, 17.38558001, 15.36091255,
       16.76266395, 35.33574206, 20.31979297, 21.72716555, 22.59466653,
       20.73749076, 32.32506398, 43.62799199, 20.21270005, 23.37803728,
       14.58232118, 16.51973286, 22.80500032, 18.6155034 , 26.63796673,
       21.68307741, 34.93140274, 17.77589944, 22.98271538, 47.06464683,
       20.80718556, 15.86701176, 32.31175045, 22.48673488, 20.46601135,
       24.36585361, 32.58383775, 34.22524603, 18.81685399, 23.34662632,
       18.78052145, 12.95047925, 23.07003428, 30.75268139, 15.33

## Cat Boost

In [13]:
#!pip install catboost

In [14]:
from catboost import CatBoostRegressor

# Definición del modelo y parámetros para Grid Search
catboost_reg = CatBoostRegressor(silent=True)
params_catboost = {
    'iterations': [2, 5, 10],
    'learning_rate': [0.01, 0.1, 0.2],
    'depth': [2, 4, 6],
    'l2_leaf_reg': [1, 3, 5],
    'loss_function': ['RMSE', 'MAE']
}

# Grid Search
grid_catboost_reg = GridSearchCV(catboost_reg, params_catboost, cv=2, scoring='neg_mean_squared_error', n_jobs=-1)
grid_catboost_reg.fit(X_train, y_train)


In [15]:
grid_catboost_reg.predict(X_test)

array([23.14634384, 29.16861207, 15.69388209, 23.36512451, 15.88248634,
       20.10699027, 19.79866555, 16.85289192, 18.83456857, 19.89318942,
       20.06581569, 19.91674696, 13.1220322 , 20.50483305, 20.17400143,
       19.63239128, 18.24438623, 13.1220322 , 40.65345632, 15.15519911,
       23.08345788, 24.37400536, 17.88833992, 23.6889302 , 14.09178322,
       16.12599293, 23.14098932, 15.61809254, 20.27360932, 20.42600275,
       19.87063228, 23.43692887, 24.21473941, 17.63262868, 15.88352915,
       16.28833968, 36.58472251, 22.68065903, 21.32739591, 23.46235063,
       18.28933698, 27.99111812, 40.65345632, 20.06663054, 23.14634384,
       14.489626  , 18.18955873, 22.8101577 , 18.02199489, 26.2955983 ,
       21.16919074, 33.50923337, 18.58740151, 23.43692887, 36.99430744,
       19.85744185, 14.489626  , 30.05311858, 23.01149065, 19.32286979,
       23.07258351, 35.90588758, 26.37951173, 19.77615865, 24.9448711 ,
       19.43638249, 14.62307497, 22.88212492, 27.47611004, 14.51

## LightGBM

In [16]:
#!pip install lightgbm

In [17]:
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV

# Definición del modelo y parámetros para Grid Search
lgbm_reg = lgb.LGBMRegressor()
params_lgbm = {
    'num_leaves': [31, 62, 127],  # Número de hojas en un árbol
    'learning_rate': [0.01, 0.1, 0.2],  # Tasa de aprendizaje
    'n_estimators': [5, 10, 15],  # Número de árboles a construir
    'max_depth': [-1, 5, 10],  # Profundidad máxima de los árboles, -1 significa sin límite
    'min_split_gain': [0.0, 0.1],  # Ganancia mínima para realizar una división
    'subsample': [0.8, 1.0],  # Submuestreo de los datos para construir los árboles
    'colsample_bytree': [0.8, 1.0]  # Submuestreo de características para construir cada árbol
}

# Grid Search
grid_lgbm_reg = GridSearchCV(lgbm_reg, params_lgbm, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_lgbm_reg.fit(X_train, y_train)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 404, number of used features: 13
[LightGBM] [Info] Start training from score 22.796535


In [18]:
grid_lgbm_reg.predict(X_test)

array([24.87779659, 33.11933945, 16.31494054, 24.26406453, 16.14426489,
       20.3654681 , 19.16950447, 16.23404933, 21.48640286, 20.2104837 ,
       21.68257184, 20.87106257,  8.16724099, 20.39292287, 20.15469049,
       26.02070564, 19.28638852,  8.97080121, 44.40510786, 14.96762725,
       25.07703436, 25.87862019, 14.85009449, 25.0350197 , 14.327691  ,
       15.09431427, 23.40710377, 14.13221132, 19.29663864, 21.49432742,
       21.27791487, 22.57261768, 27.06163733, 21.0162059 , 15.40251844,
       15.7209547 , 37.06344716, 20.19231087, 20.92834125, 24.18064711,
       19.08629726, 29.57573187, 44.40510786, 19.02738008, 23.59994951,
       13.78872776, 15.05710435, 23.87519985, 18.3727343 , 27.75848865,
       21.76938435, 33.99937154, 16.34220916, 25.34212308, 42.60909527,
       21.74881196, 14.67731393, 32.71053373, 20.64440932, 19.17945261,
       24.64013823, 34.86000813, 32.97177938, 19.00729866, 25.92913478,
       17.50434261, 13.32592118, 22.59636931, 29.36477112, 15.03

### Escalado de datos

In [19]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Perceptrón Multicapa

In [20]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV

mlp_reg = MLPRegressor(max_iter=500)
params_mlp_reg = {
    'hidden_layer_sizes': [(100,), (50, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01],
}


grid_mlp_reg = GridSearchCV(mlp_reg, params_mlp_reg, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_mlp_reg.fit(X_train_scaled, y_train) 

3 fits failed out of a total of 144.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Diego\anaconda3\envs\gpu\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Diego\anaconda3\envs\gpu\Lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Diego\anaconda3\envs\gpu\Lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py", line 753, in fit
    return self._fit(X, y, incremental=False)
           ^^^^^^^^^^^^^^^^^^^^^^^

In [21]:
grid_mlp_reg.predict(X_test_scaled) 

array([26.26294769, 30.0351389 , 16.38443193, 24.81967324, 16.47972501,
       23.58619104, 20.34466895, 15.79186275, 26.32183382, 21.46889152,
       22.66923784, 22.10216779,  8.52183023, 23.20665589, 21.34781966,
       27.78440175, 20.22747398, 10.57102731, 42.23565793, 15.22397729,
       26.0078422 , 28.49479609, 16.90849982, 24.07659914, 17.03866242,
       17.23351863, 23.22970538, 11.80494656, 22.61314134, 20.81178956,
       22.5825962 , 25.144024  , 16.57940363, 20.2912733 , 15.04196127,
       17.97308361, 31.44437068, 22.02020968, 23.42061642, 25.24457034,
       18.35586589, 32.34640943, 43.11447725, 21.76606016, 25.48591217,
       16.18496659, 18.04812672, 26.31308785, 20.44822499, 27.44071288,
       22.92793158, 32.23289782, 18.68990541, 28.11737168, 41.34669559,
       28.45835415, 16.48572132, 33.79988164, 24.56486528, 20.22322839,
       25.89530755, 34.00901352, 32.44231723, 20.21378424, 26.06431527,
       21.0528508 , 17.74030077, 25.7444186 , 30.6959233 , 12.06

### KNN

In [22]:
from sklearn.neighbors import KNeighborsRegressor

# Definición del modelo y parámetros para Grid Search
knn_reg = KNeighborsRegressor()
params_knn_reg = {
    'n_neighbors': range(1, 3),
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [2, 5, 7],
    'p': [1, 2]
}

# Grid Search
grid_knn_reg = GridSearchCV(knn_reg, params_knn_reg, cv=2, scoring='neg_mean_squared_error', n_jobs=-1)
grid_knn_reg.fit(X_train, y_train)

In [23]:
grid_knn_reg.predict(X_test_scaled)



array([17.15547951, 17.1526809 , 17.15629344, 17.15536683, 17.15553862,
       17.15570415, 17.15575308, 17.15605893, 17.15633517, 17.15577634,
       17.15309699, 17.1557463 , 17.14713272, 17.1556887 , 17.15562349,
       17.15636621, 17.15586087, 17.15312236, 17.15509438, 17.15454967,
       17.15532268, 17.15528822, 17.15588431, 17.15572418, 17.15502714,
       17.155633  , 17.15553607, 17.15419115, 17.15311162, 17.15573166,
       17.15545803, 17.15558342, 17.15177667, 17.15527017, 17.15638388,
       17.15462651, 17.15552989, 17.15553283, 17.15299528, 17.15539726,
       17.1557732 , 17.15545653, 17.15504968, 17.15553789, 17.15541317,
       17.15511899, 17.15581157, 17.15541514, 17.15638888, 17.15537439,
       17.15568288, 17.15529022, 17.15590838, 17.15576329, 17.15534613,
       17.15625678, 17.15551819, 17.15517441, 17.15544704, 17.15624942,
       17.15623689, 17.15649789, 17.1553425 , 17.15543801, 17.15763491,
       17.15313734, 17.15473054, 17.15540415, 17.15510896, 17.15

### Support Vector Machine

In [24]:
from sklearn.svm import SVR

# Definición del modelo y parámetros para Grid Search
svr_reg = SVR()
params_svr_reg = {
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto'],
    'C': [0.1, 1],
    'epsilon': [0.01, 0.1]
}

# Grid Search
grid_svr_reg = GridSearchCV(svr_reg, params_svr_reg, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_svr_reg.fit(X_train, y_train)

In [25]:
grid_svr_reg.predict(X_test_scaled)



array([  6.52617135,  12.84694493,   0.62442452,   3.26743929,
         1.7414488 ,   5.04178217,   0.35783786,   1.54795398,
         3.48600711,   1.44587716,   4.85306179,   1.09363955,
       -13.96090461,   4.21722319,  -0.99733074,   8.96126911,
         0.36252486, -10.2487384 ,  19.45374673,   0.75490843,
         4.54604711,   7.17037183,  -2.27101928,   6.02666369,
        -0.47719862,  -3.86969603,   1.66410916,   5.06602275,
         4.70810449,  -0.64842381,  -0.7263579 ,   5.29229429,
        11.19861493,  -6.30960048,   0.75581304,  -3.54117109,
        12.30027237,  -0.22211942,   5.57052468,   4.66425364,
        -4.29224423,  10.05571664,  21.05276122,  -1.46738838,
         6.2913029 ,  -2.74774495,  -1.96358415,   6.13197567,
         2.20337987,   7.34152784,   1.4931185 ,  11.55269744,
         1.59887859,   8.26578469,  15.60129746,   3.24169682,
         0.91645197,  10.15703045,   5.62288933,  -2.66184946,
         6.94859193,  12.11273643,   7.5492415 ,  -3.60

### ElasticNet

In [26]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV

# Definición del modelo y parámetros para Grid Search
elastic_net_reg = ElasticNet()
params_elastic_net = {
    'alpha': [0.1, 1, 10, 100],  # Combina la importancia de los términos de regularización L1 y L2
    'l1_ratio': [0.1, 0.5, 0.9],  # Mezcla entre la regresión Lasso y Ridge (0 = Ridge, 1 = Lasso)
    'max_iter': [100, 500],  # Número máximo de iteraciones
    'tol': [0.0001, 0.001]  # Tolerancia para el criterio de parada
}

# Grid Search
grid_elastic_net_reg = GridSearchCV(elastic_net_reg, params_elastic_net, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_elastic_net_reg.fit(X_train, y_train)


In [27]:
grid_elastic_net_reg.predict(X_test_scaled)



array([29.34285741, 34.46622277, 25.07945249, 25.87177822, 26.59175233,
       27.11355027, 24.50559123, 25.22704138, 26.93748542, 25.86824172,
       29.89509548, 24.86106487, 16.17432951, 26.32242174, 23.4968942 ,
       30.69110502, 24.98995594, 18.91866376, 36.5178473 , 25.98025834,
       27.15337407, 28.91510596, 22.12347056, 28.28769878, 25.54120632,
       22.70742375, 25.16217573, 28.37841337, 28.88250968, 24.53882939,
       24.78051265, 27.488185  , 32.42698735, 22.62102704, 25.20444975,
       25.05493917, 31.84267715, 23.89650492, 29.42161681, 26.56606711,
       22.01543561, 30.92354922, 37.52620576, 22.40031558, 28.69084452,
       24.0459571 , 22.68598966, 27.47433327, 26.19132896, 29.93065058,
       25.42803184, 31.86462363, 24.69226475, 29.53398652, 35.9065516 ,
       26.86213847, 26.36585198, 30.19226652, 27.47652452, 21.11298207,
       27.25480067, 30.49688076, 29.89230673, 22.0878416 , 25.77697313,
       25.38110387, 27.83972383, 26.94341256, 28.88061267, 24.71

### Lasso y Lars

In [28]:
from sklearn.linear_model import LassoLars
from sklearn.model_selection import GridSearchCV

# Definición del modelo y parámetros para Grid Search
lasso_lars = LassoLars()
params_lasso_lars = {
    'alpha': [0.01, 0.1, 1, 10],  # Parámetro de penalización
    'max_iter': [10, 50, 100],  # Número máximo de iteraciones
    'eps': [1e-4, 1e-8, 1e-12]  # Épsilon para el criterio de convergencia
}

# Grid Search sin el parámetro 'normalize'
grid_lasso_lars = GridSearchCV(lasso_lars, params_lasso_lars, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_lasso_lars.fit(X_train, y_train)


In [29]:
grid_lasso_lars.predict(X_test_scaled)



array([ 35.00303197,  53.64028191,  19.13637139,  41.73988551,
         9.05383471,  40.80158932,  25.57197636,  17.8530644 ,
        29.7913005 ,  22.46897117,  36.64975144,  28.17965707,
        -1.25035318,  39.85804224,  29.84251304,  25.29811308,
        22.08707273,   1.12785309,  53.92465188,   8.30593365,
        37.53895795,  41.64488991,  23.50504044,  26.94034563,
         7.04620668,  18.64421129,  38.19297926,  11.45694328,
        42.37582491,  20.76974124,  29.3366229 ,  34.79757212,
        21.14806236,  -2.80852755,  21.99128711, -10.99088343,
        42.94837659,  27.83940547,  42.98852259,  42.7177068 ,
        17.78780636,  41.34357731,  55.88123653,  38.02174063,
        36.64767769,   7.20374736,  24.03070774,  43.85683634,
        23.54502167,  32.46887963,  28.7322124 ,  45.69364954,
        26.64686152,  34.39319675,  35.68426465,  23.90408449,
         5.66627068,  47.3167741 ,  39.78134208,  34.16636916,
        39.26574448,  48.77839309,  36.26152435,  22.98

### Comparacion de Metricas

In [30]:
from sklearn.metrics import mean_squared_error, r2_score
from tabulate import tabulate

# Supongamos que tienes un diccionario de modelos entrenados
# donde las claves son los nombres de los modelos y los valores son los modelos ya entrenados
models = {
    'DecisionTree': grid_dt_reg.best_estimator_,
    'RandomForest': grid_rf_reg.best_estimator_,
    'XGBoost': grid_xgb_reg.best_estimator_,
    'MLPRegressor': grid_mlp_reg.best_estimator_,
    'SVR': grid_svr_reg.best_estimator_,
    'KNeighbors': grid_knn_reg.best_estimator_,
    'ElasticNet': grid_elastic_net_reg.best_estimator_,
    'GradientBoosting': grid_gb_reg.best_estimator_,
    'CatBoost': grid_catboost_reg.best_estimator_,
    'LightGBM': grid_lgbm_reg.best_estimator_,

}

# Calcula MSE y R^2 para cada modelo
results = []
for name, model in models.items():
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results.append([name, mse, r2])

# Mostrar los resultados en una tabla usando tabulate
print(tabulate(results, headers=['Modelo', 'MSE', 'R^2']))


Modelo                  MSE        R^2
----------------  ---------  ---------
DecisionTree       10.4862    0.857007
RandomForest        9.59761   0.869124
XGBoost            20.4192    0.721558
MLPRegressor      343.919    -3.68977
SVR                29.4359    0.598604
KNeighbors         20.5378    0.719941
ElasticNet         24.042     0.672157
GradientBoosting    9.40008   0.871818
CatBoost           15.2861    0.791554
LightGBM           11.4809    0.843444


