# Optimización de performance

## Preparación de ambiente

### Carga de módulos

In [1]:
# Data Wrangling
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston

# Data Viz
import cufflinks as cf

# Preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

# Modeling
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import Ridge, Lasso, ElasticNet, BayesianRidge, Lars

# Enviroment setup
cf.go_offline()
# pd.set_option("max_columns", 500)



## Data Wrangling

### Carga de datos

In [2]:
boston = load_boston()


Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.

    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this case special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    

#### Creación de TAD

In [3]:
df = pd.DataFrame(data = boston["data"], columns=boston["feature_names"])
df["target"] = boston["target"]

### EDA

In [4]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [5]:
df.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063,22.532806
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062,9.197104
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95,17.025
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36,21.2
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


In [6]:
print(boston["DESCR"])

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu


### Segmentación de sets

In [7]:
tgt = "target"
ls_pred = [x for x in df.columns if x not in [tgt]]

In [8]:
X = df[ls_pred]
y = df[[tgt]]

In [9]:
df[[tgt]]

Unnamed: 0,target
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2
...,...
501,22.4
502,20.6
503,23.9
504,22.0


In [10]:
df[tgt]

0      24.0
1      21.6
2      34.7
3      33.4
4      36.2
       ... 
501    22.4
502    20.6
503    23.9
504    22.0
505    11.9
Name: target, Length: 506, dtype: float64

### Train-test split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

#### Escalamiento

In [12]:
#sc = MinMaxScaler()
sc = StandardScaler()

In [13]:
Xs = pd.DataFrame(data=sc.fit_transform(X_train), columns=X_train.columns, index=X_train.index)

In [14]:
Xs

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
163,-0.220274,-0.483582,1.313864,3.627671,0.446644,3.048254,0.895969,-0.804421,-0.497260,0.016469,-1.693467,0.332031,-1.327544
202,-0.397055,3.085086,-1.347196,-0.275659,-1.170539,1.932224,-1.887187,1.250462,-0.854802,-0.325732,-1.693467,0.415574,-1.358196
45,-0.379386,-0.483582,-0.607255,-0.275659,-0.889660,-0.880465,-1.243004,0.665410,-0.735621,-1.041243,-0.235082,0.433898,-0.321864
206,-0.372506,-0.483582,-0.049266,-0.275659,-0.540689,0.059043,-0.577467,0.292500,-0.616440,-0.767482,0.083940,0.409425,-0.210932
264,-0.334678,0.381550,-1.053039,-0.275659,0.804127,1.342843,0.814112,-0.920421,-0.497260,-0.848366,-2.468233,0.325280,-0.629844
...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,-0.397995,2.976944,-1.585251,-0.275659,-1.110959,2.318822,-1.307066,0.939528,-0.616440,-0.904362,-1.830190,0.401710,-1.378631
42,-0.382918,-0.483582,-0.607255,-0.275659,-0.889660,-0.169999,-2.211058,0.975794,-0.735621,-1.041243,-0.235082,0.270790,-0.964098
62,-0.386606,0.597833,-0.877152,-0.275659,-0.847103,0.248695,-0.032936,1.728417,-0.139717,-0.723929,0.585259,0.433898,-0.829813
276,-0.387264,1.246681,-0.683068,3.627671,-0.898172,1.431833,-0.702032,0.508743,-0.616440,-0.910584,-0.371806,0.341675,-0.929067


## Modelado

### Lasso

In [15]:
lasso = Lasso()

#### Cross-Validation

#### Entrenamiento con datos escalados

In [16]:
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "tol": [0.00001, 0.0000001, 0.01],
    "selection": ['cyclic', 'random']
}

In [17]:
np.prod(list(map(len, param_grid.values())))

654

In [18]:
search = GridSearchCV(lasso, param_grid, cv=4, error_score=-1000, n_jobs=-1, scoring="r2", verbose=5)

In [19]:
search.fit(X, y)

Fitting 4 folds for each of 654 candidates, totalling 2616 fits




[CV 4/4] END alpha=1, selection=cyclic, tol=1e-05;, score=-0.303 total time=   0.0s
[CV 2/4] END alpha=1, selection=cyclic, tol=0.01;, score=0.520 total time=   0.0s
[CV 1/4] END alpha=1, selection=random, tol=1e-07;, score=0.542 total time=   0.0s
[CV 2/4] END alpha=1, selection=cyclic, tol=1e-07;, score=0.520 total time=   0.0s
[CV 2/4] END alpha=1, selection=random, tol=1e-07;, score=0.520 total time=   0.0s
[CV 3/4] END alpha=1, selection=random, tol=1e-07;, score=0.382 total time=   0.0s
[CV 4/4] END alpha=1, selection=random, tol=1e-07;, score=-0.303 total time=   0.0s
[CV 1/4] END alpha=1, selection=cyclic, tol=1e-05;, score=0.542 total time=   0.0s
[CV 1/4] END alpha=1, selection=random, tol=0.01;, score=0.543 total time=   0.0s
[CV 3/4] END alpha=1, selection=random, tol=0.01;, score=0.380 total time=   0.0s
[CV 2/4] END alpha=1, selection=random, tol=0.01;, score=0.519 total time=   0.0s
[CV 4/4] END alpha=1, selection=random, tol=0.01;, score=-0.304 total time=   0.0s
[CV 1/

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f

[CV 1/4] END alpha=0.5, selection=random, tol=0.01;, score=0.616 total time=   0.0s
[CV 1/4] END alpha=99, selection=cyclic, tol=1e-07;, score=-0.709 total time=   0.0s
[CV 4/4] END alpha=0.3, selection=cyclic, tol=0.01;, score=-0.775 total time=   0.0s
[CV 2/4] END alpha=98, selection=cyclic, tol=1e-05;, score=-0.077 total time=   0.0s
[CV 2/4] END alpha=0.8, selection=cyclic, tol=0.01;, score=0.526 total time=   0.0s
[CV 4/4] END alpha=90, selection=cyclic, tol=1e-05;, score=-2.425 total time=   0.0s
[CV 3/4] END alpha=0.4, selection=cyclic, tol=0.01;, score=0.396 total time=   0.0s
[CV 2/4] END alpha=0.9, selection=random, tol=1e-07;, score=0.524 total time=   0.0s
[CV 1/4] END alpha=0.0, selection=cyclic, tol=0.01;, score=0.602 total time=   0.0s
[CV 2/4] END alpha=0.5, selection=random, tol=0.01;, score=0.541 total time=   0.0s
[CV 2/4] END alpha=0.2, selection=cyclic, tol=1e-05;, score=0.603 total time=   0.0s
[CV 4/4] END alpha=0.9, selection=random, tol=1e-07;, score=-0.368 tot

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.e

GridSearchCV(cv=4, error_score=-1000, estimator=Lasso(), n_jobs=-1,
             param_grid={'alpha': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                   14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                   25, 26, 27, 28, 29, 30, ...],
                         'selection': ['cyclic', 'random'],
                         'tol': [1e-05, 1e-07, 0.01]},
             scoring='r2', verbose=5)

In [20]:
print("Best score: " + str(search.best_score_))

Best score: 0.29678944695787396


In [21]:
search.best_estimator_

Lasso(alpha=2, selection='random', tol=0.01)

In [22]:
summary = pd.DataFrame(search.cv_results_)

In [23]:
summary.sort_values(by = "rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_selection,param_tol,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
11,0.006326,0.002748,0.006760,0.003892,2,random,0.01,"{'alpha': 2, 'selection': 'random', 'tol': 0.01}",0.404666,0.473217,0.278515,0.030760,0.296789,0.168723,1
8,0.007401,0.004726,0.003622,0.000221,2,cyclic,0.01,"{'alpha': 2, 'selection': 'cyclic', 'tol': 0.01}",0.400732,0.472319,0.281414,0.026274,0.295185,0.169573,2
7,0.013189,0.005749,0.003456,0.000261,2,cyclic,0.0,"{'alpha': 2, 'selection': 'cyclic', 'tol': 1e-07}",0.403640,0.473144,0.278769,0.024032,0.294896,0.171191,3
10,0.006665,0.002769,0.007560,0.006657,2,random,0.0,"{'alpha': 2, 'selection': 'random', 'tol': 1e-07}",0.403640,0.473144,0.278769,0.024032,0.294896,0.171191,4
9,0.008086,0.005202,0.003894,0.000845,2,random,0.00001,"{'alpha': 2, 'selection': 'random', 'tol': 1e-05}",0.403635,0.473143,0.278766,0.024038,0.294896,0.171188,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
592,0.006186,0.000867,0.007434,0.004146,99,random,0.0,"{'alpha': 99, 'selection': 'random', 'tol': 1e...",-0.709215,-0.077437,-0.161790,-2.568880,-0.879330,1.005163,650
589,0.004393,0.000295,0.003148,0.000161,99,cyclic,0.0,"{'alpha': 99, 'selection': 'cyclic', 'tol': 1e...",-0.709215,-0.077437,-0.161790,-2.568880,-0.879330,1.005163,651
588,0.005999,0.004045,0.007466,0.008058,99,cyclic,0.00001,"{'alpha': 99, 'selection': 'cyclic', 'tol': 1e...",-0.709215,-0.077437,-0.161790,-2.568880,-0.879331,1.005163,652
590,0.003591,0.000196,0.002947,0.000036,99,cyclic,0.01,"{'alpha': 99, 'selection': 'cyclic', 'tol': 0.01}",-0.711440,-0.077355,-0.162067,-2.568880,-0.879936,1.005037,653


In [24]:
X_test

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
171,2.31390,0.0,19.58,0.0,0.6050,5.880,97.3,2.3887,5.0,403.0,14.7,348.13,12.03
97,0.12083,0.0,2.89,0.0,0.4450,8.069,76.0,3.4952,2.0,276.0,18.0,396.90,4.21
156,2.44668,0.0,19.58,0.0,0.8710,5.272,94.0,1.7364,5.0,403.0,14.7,88.63,16.14
491,0.10574,0.0,27.74,0.0,0.6090,5.983,98.8,1.8681,4.0,711.0,20.1,390.11,18.07
361,3.83684,0.0,18.10,0.0,0.7700,6.251,91.1,2.2955,24.0,666.0,20.2,350.65,14.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,0.05023,35.0,6.06,0.0,0.4379,5.706,28.4,6.6407,1.0,304.0,16.9,394.02,12.43
214,0.28955,0.0,10.59,0.0,0.4890,5.412,9.8,3.5875,4.0,277.0,18.6,348.93,29.55
253,0.36894,22.0,5.86,0.0,0.4310,8.259,8.4,8.9067,7.0,330.0,19.1,396.90,3.54
494,0.27957,0.0,9.69,0.0,0.5850,5.926,42.6,2.3817,6.0,391.0,19.2,396.90,13.59


In [25]:
dc_scores = dict()

In [26]:
dc_scores[str(lasso).split("(")[0]] = {"model": search.best_estimator_, "score": search.best_score_}

In [27]:
dc_scores

{'Lasso': {'model': Lasso(alpha=2, selection='random', tol=0.01),
  'score': 0.29678944695787396}}

### Ridge

In [28]:
ridge = Ridge()

#### Cross-Validation

#### Hiperparametrización

In [29]:
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "tol": [0.00001, 0.0000001, 0.01],
    "solver": ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
}

In [30]:
np.prod(list(map(len, param_grid.values())))

2289

In [31]:
search = RandomizedSearchCV(ridge, param_grid, cv=4, error_score=-1_000, n_jobs=-1, scoring="r2", verbose=1, n_iter=100)
search.fit(X_train, y_train)

Fitting 4 folds for each of 100 candidates, totalling 400 fits




RandomizedSearchCV(cv=4, error_score=-1000, estimator=Ridge(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'alpha': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                                  11, 12, 13, 14, 15, 16, 17,
                                                  18, 19, 20, 21, 22, 23, 24,
                                                  25, 26, 27, 28, 29, 30, ...],
                                        'solver': ['auto', 'svd', 'cholesky',
                                                   'lsqr', 'sparse_cg', 'sag',
                                                   'saga'],
                                        'tol': [1e-05, 1e-07, 0.01]},
                   scoring='r2', verbose=1)

In [32]:
search.best_estimator_

Ridge(alpha=0.4, solver='svd', tol=1e-07)

In [33]:
summary = pd.DataFrame(search.cv_results_)

In [34]:
summary.sort_values(by = "rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_tol,param_solver,param_alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
84,0.003180,0.000688,0.002077,0.000211,0.0,svd,0.4,"{'tol': 1e-07, 'solver': 'svd', 'alpha': 0.4}",0.509336,0.755953,0.776124,0.755312,0.699181,0.109926,1
78,0.002927,0.000691,0.002100,0.000190,0.00001,cholesky,0.7,"{'tol': 1e-05, 'solver': 'cholesky', 'alpha': ...",0.507498,0.758810,0.774804,0.754712,0.698956,0.110793,2
26,0.006387,0.002696,0.003218,0.000268,0.0,sparse_cg,0.7,"{'tol': 1e-07, 'solver': 'sparse_cg', 'alpha':...",0.507497,0.758810,0.774804,0.754712,0.698955,0.110793,3
83,0.004832,0.003090,0.002633,0.000382,0.00001,cholesky,1,"{'tol': 1e-05, 'solver': 'cholesky', 'alpha': 1}",0.506304,0.760362,0.773696,0.754054,0.698604,0.111251,4
3,0.006130,0.001132,0.004411,0.000591,0.01,auto,13,"{'tol': 0.01, 'solver': 'auto', 'alpha': 13}",0.509946,0.768638,0.764313,0.749020,0.697979,0.108806,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.007073,0.002516,0.004496,0.003076,0.01,lsqr,38,"{'tol': 0.01, 'solver': 'lsqr', 'alpha': 38}",0.505170,0.655216,0.586920,0.617251,0.591139,0.055218,96
90,0.004957,0.002575,0.003594,0.002219,0.01,lsqr,3,"{'tol': 0.01, 'solver': 'lsqr', 'alpha': 3}",0.506423,0.654293,0.587133,0.616083,0.590983,0.054321,97
8,0.010817,0.001071,0.010528,0.004412,0.01,saga,0.9,"{'tol': 0.01, 'solver': 'saga', 'alpha': 0.9}",0.378755,0.669622,0.563691,0.667842,0.569978,0.118440,98
54,0.006273,0.000564,0.002306,0.000278,0.01,saga,0.8,"{'tol': 0.01, 'solver': 'saga', 'alpha': 0.8}",0.378956,0.669501,0.562552,0.668361,0.569842,0.118457,99


In [35]:
search.best_estimator_

Ridge(alpha=0.4, solver='svd', tol=1e-07)

In [36]:
dc_scores[str(ridge).split("(")[0]] = {"model": search.best_estimator_, "score": search.best_score_}

In [37]:
dc_scores

{'Lasso': {'model': Lasso(alpha=2, selection='random', tol=0.01),
  'score': 0.29678944695787396},
 'Ridge': {'model': Ridge(alpha=0.4, solver='svd', tol=1e-07),
  'score': 0.6991812602048931}}

### Elastic Net

In [38]:
elnet = ElasticNet()

#### Cross-Validation

#### Entrenamiento con datos escalados

#### Hiperparametrización

##### Grid Search

In [39]:
param_grid = {
    "alpha": [x for x in range(1, 100)] + [y/10 for y in range(10)],
    "l1_ratio": [x/1_00 for x in range(1, 100)] + [y/10 for y in range(10)],
    "selection": ["cyclic", "random"]
}

In [40]:
np.prod(list(map(len, param_grid.values())))

23762

In [41]:
search = GridSearchCV(elnet, param_grid, cv=4, error_score=-1_000, n_jobs=-1, scoring="r2", verbose=1)

In [42]:
search.fit(X_train, y_train)

Fitting 4 folds for each of 23762 candidates, totalling 95048 fits


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

GridSearchCV(cv=4, error_score=-1000, estimator=ElasticNet(), n_jobs=-1,
             param_grid={'alpha': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                   14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
                                   25, 26, 27, 28, 29, 30, ...],
                         'l1_ratio': [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07,
                                      0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14,
                                      0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21,
                                      0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28,
                                      0.29, 0.3, ...],
                         'selection': ['cyclic', 'random']},
             scoring='r2', verbose=1)

In [43]:
print("Best score: " + str(search.best_score_))

Best score: 0.6977964719438308


In [44]:
search.best_estimator_

ElasticNet(alpha=0.0, l1_ratio=0.34, selection='random')

In [45]:
summary = pd.DataFrame(search.cv_results_)

In [46]:
summary.sort_values(by = "rank_test_score")

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,param_selection,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
21717,0.010210,0.001543,0.011203,0.008647,0.0,0.68,random,"{'alpha': 0.0, 'l1_ratio': 0.68, 'selection': ...",0.513730,0.745402,0.777483,0.754571,0.697796,0.106911,1
21769,0.009128,0.000588,0.004139,0.002243,0.0,0.94,random,"{'alpha': 0.0, 'l1_ratio': 0.94, 'selection': ...",0.513730,0.745402,0.777483,0.754571,0.697796,0.106911,1
21649,0.018649,0.009437,0.003380,0.000786,0.0,0.34,random,"{'alpha': 0.0, 'l1_ratio': 0.34, 'selection': ...",0.513730,0.745402,0.777483,0.754571,0.697796,0.106911,1
21749,0.009487,0.000907,0.003220,0.000151,0.0,0.84,random,"{'alpha': 0.0, 'l1_ratio': 0.84, 'selection': ...",0.513730,0.745402,0.777483,0.754571,0.697796,0.106911,4
21741,0.009244,0.000294,0.003270,0.000399,0.0,0.8,random,"{'alpha': 0.0, 'l1_ratio': 0.8, 'selection': '...",0.513730,0.745402,0.777483,0.754571,0.697796,0.106911,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21342,0.005780,0.002367,0.005930,0.004168,98,0.99,cyclic,"{'alpha': 98, 'l1_ratio': 0.99, 'selection': '...",-0.003532,0.269309,0.185627,0.316971,0.192094,0.122339,23758
21558,0.003457,0.000276,0.003354,0.000995,99,0.98,cyclic,"{'alpha': 99, 'l1_ratio': 0.98, 'selection': '...",-0.003525,0.269306,0.185624,0.316964,0.192092,0.122334,23759
21343,0.004125,0.000960,0.002822,0.000044,98,0.99,random,"{'alpha': 98, 'l1_ratio': 0.99, 'selection': '...",-0.003532,0.269308,0.186050,0.316434,0.192065,0.122197,23760
21560,0.006111,0.002111,0.007834,0.004897,99,0.99,cyclic,"{'alpha': 99, 'l1_ratio': 0.99, 'selection': '...",-0.003269,0.269133,0.185461,0.316266,0.191898,0.122029,23761


In [47]:
dc_scores[str(elnet).split("(")[0]] = {"model": search.best_estimator_, "score": search.best_score_}

In [48]:
dc_scores

{'Lasso': {'model': Lasso(alpha=2, selection='random', tol=0.01),
  'score': 0.29678944695787396},
 'Ridge': {'model': Ridge(alpha=0.4, solver='svd', tol=1e-07),
  'score': 0.6991812602048931},
 'ElasticNet': {'model': ElasticNet(alpha=0.0, l1_ratio=0.34, selection='random'),
  'score': 0.6977964719438308}}

## Preservación y consumo del modelo

In [49]:
pd.to_pickle(dc_scores["Ridge"]["model"], "best_model_boston.diplo")
pd.to_pickle(sc, "scaler_boston.diplo")
#pd.to_pickle(sc_y, "scaler_target_boston.diplo")
pd.to_pickle(X_train.columns, "features_boston.diplo")

In [50]:
model = pd.read_pickle("best_model_boston.diplo")
scaler = pd.read_pickle("scaler_boston.diplo")
#scaler_target = pd.read_pickle("scaler_target_boston.diplo")
features = pd.read_pickle("features_boston.diplo")

In [51]:
#scaler_target.inverse_transform(model.predict(scaler.transform(df[features])))

In [52]:
X["y_hat"] = model.predict(scaler.transform(X[features]))


X does not have valid feature names, but Ridge was fitted with feature names



In [53]:
X

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,y_hat
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,40.911722
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,44.945998
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,49.006062
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,48.193735
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,48.702375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,35.999109
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,33.938143
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,38.383096
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,37.285337


In [54]:
X["y"] = y

In [55]:
X

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,y_hat,y
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,40.911722,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,44.945998,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,49.006062,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,48.193735,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,48.702375,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,35.999109,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,33.938143,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,38.383096,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,37.285337,22.0


In [56]:
from sklearn.metrics import r2_score
for model in dc_scores.values():
    print(model["model"])
    print(model["score"])
    print(r2_score(y_test, model["model"].predict(X_test)))
    

Lasso(alpha=2, selection='random', tol=0.01)
0.29678944695787396
0.6091250714730269
Ridge(alpha=0.4, solver='svd', tol=1e-07)
0.6991812602048931
0.7214351028077954
ElasticNet(alpha=0.0, l1_ratio=0.34, selection='random')
0.6977964719438308
0.7237945767258424


## Búsqueda de la mejor combinación de modelo y escalador
En esta sección se probarán diferentes modelos de regresión, escaladores y combinaciones de hiperparámetros para encontrar la mejor pipeline.

In [57]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso, Ridge, ElasticNet, BayesianRidge, Lars
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import warnings
warnings.filterwarnings('ignore')
scalers = {
    'StandardScaler': StandardScaler(),
    'MinMaxScaler': MinMaxScaler(),
    'RobustScaler': RobustScaler()
}
models = {
    'Lasso': (Lasso(), {
        'model__alpha': [0.01, 0.1, 1, 10],
        'model__tol': [1e-4, 1e-6],
        'model__selection': ['cyclic', 'random']
    }),
    'Ridge': (Ridge(), {
        'model__alpha': [0.01, 0.1, 1, 10],
        'model__tol': [1e-4, 1e-6],
        'model__solver': ['auto', 'svd', 'cholesky', 'lsqr']
    }),
    'ElasticNet': (ElasticNet(), {
        'model__alpha': [0.01, 0.1, 1, 10],
        'model__l1_ratio': [0.1, 0.5, 0.9],
        'model__selection': ['cyclic', 'random']
    }),
    'BayesianRidge': (BayesianRidge(), {
        'model__alpha_1': [1e-6, 1e-4],
        'model__alpha_2': [1e-6, 1e-4]
    }),
    'Lars': (Lars(), {
        'model__n_nonzero_coefs': [1, 2, 3, 4, 5]
    })
}
results = []
for scaler_name, scaler in scalers.items():
    for model_name, (model, param_grid) in models.items():
        pipe = Pipeline([('scaler', scaler), ('model', model)])
        search = GridSearchCV(pipe, param_grid, cv=4, scoring='r2', n_jobs=-1, error_score='raise', verbose=0)
        search.fit(X_train, y_train.values.ravel())
        score = search.best_score_
        results.append({
            'scaler': scaler_name,
            'model': model_name,
            'best_params': search.best_params_,
            'best_score': score,
            'best_estimator': search.best_estimator_
        })
import pandas as pd
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by='best_score', ascending=False).reset_index(drop=True)
results_df.to_pickle('all_model_scaler_results.diplo')
results_df.head()

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)


If you wish to scale the data, use Pipeline with a StandardScaler in a

Unnamed: 0,scaler,model,best_params,best_score,best_estimator
0,RobustScaler,Ridge,"{'model__alpha': 10, 'model__solver': 'lsqr', ...",0.701749,"(RobustScaler(), Ridge(alpha=10, solver='lsqr'..."
1,RobustScaler,BayesianRidge,"{'model__alpha_1': 1e-06, 'model__alpha_2': 0....",0.701015,"(RobustScaler(), BayesianRidge(alpha_2=0.0001))"
2,RobustScaler,ElasticNet,"{'model__alpha': 0.01, 'model__l1_ratio': 0.1,...",0.700622,"(RobustScaler(), ElasticNet(alpha=0.01, l1_rat..."
3,MinMaxScaler,BayesianRidge,"{'model__alpha_1': 1e-06, 'model__alpha_2': 0....",0.699633,"(MinMaxScaler(), BayesianRidge(alpha_2=0.0001))"
4,StandardScaler,BayesianRidge,"{'model__alpha_1': 1e-06, 'model__alpha_2': 1e...",0.699472,"(StandardScaler(), BayesianRidge())"


In [62]:
results_df.sort_values(by='best_score', ascending=False).head(10)

Unnamed: 0,scaler,model,best_params,best_score,best_estimator
0,RobustScaler,Ridge,"{'model__alpha': 10, 'model__solver': 'lsqr', ...",0.701749,"(RobustScaler(), Ridge(alpha=10, solver='lsqr'..."
1,RobustScaler,BayesianRidge,"{'model__alpha_1': 1e-06, 'model__alpha_2': 0....",0.701015,"(RobustScaler(), BayesianRidge(alpha_2=0.0001))"
2,RobustScaler,ElasticNet,"{'model__alpha': 0.01, 'model__l1_ratio': 0.1,...",0.700622,"(RobustScaler(), ElasticNet(alpha=0.01, l1_rat..."
3,MinMaxScaler,BayesianRidge,"{'model__alpha_1': 1e-06, 'model__alpha_2': 0....",0.699633,"(MinMaxScaler(), BayesianRidge(alpha_2=0.0001))"
4,StandardScaler,BayesianRidge,"{'model__alpha_1': 1e-06, 'model__alpha_2': 1e...",0.699472,"(StandardScaler(), BayesianRidge())"
5,StandardScaler,Ridge,"{'model__alpha': 10, 'model__solver': 'lsqr', ...",0.699145,"(StandardScaler(), Ridge(alpha=10, solver='lsq..."
6,StandardScaler,ElasticNet,"{'model__alpha': 0.01, 'model__l1_ratio': 0.1,...",0.698728,"(StandardScaler(), ElasticNet(alpha=0.01, l1_r..."
7,MinMaxScaler,Ridge,"{'model__alpha': 0.1, 'model__solver': 'lsqr',...",0.698701,"(MinMaxScaler(), Ridge(alpha=0.1, solver='lsqr..."
8,RobustScaler,Lasso,"{'model__alpha': 0.01, 'model__selection': 'ra...",0.698345,"(RobustScaler(), Lasso(alpha=0.01, selection='..."
9,StandardScaler,Lasso,"{'model__alpha': 0.01, 'model__selection': 'cy...",0.698066,"(StandardScaler(), Lasso(alpha=0.01, tol=1e-06))"


In [58]:
# Seleccionar el mejor pipeline y guardarlo
best_row = results_df.iloc[0]
best_pipeline = best_row['best_estimator']
import joblib
joblib.dump(best_pipeline, 'best_pipeline_boston.diplo')
print('Mejor combinación:')
print('Escalador:', best_row['scaler'])
print('Modelo:', best_row['model'])
print('Mejores hiperparámetros:', best_row['best_params'])
print('Mejor score:', best_row['best_score'])

Mejor combinación:
Escalador: RobustScaler
Modelo: Ridge
Mejores hiperparámetros: {'model__alpha': 10, 'model__solver': 'lsqr', 'model__tol': 1e-06}
Mejor score: 0.7017490011757013


In [60]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Predecir en el conjunto de test con el mejor pipeline
y_pred = best_pipeline.predict(X_test)

# Calcular métricas
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"R2 en test: {r2:.4f}")
print(f"MSE en test: {mse:.4f}")
print(f"MAE en test: {mae:.4f}")

R2 en test: 0.7283
MSE en test: 24.9688
MAE en test: 3.4629
