# Regresiones

## Preparación de ambiente

### Carga de módulos

In [68]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Lars, ElasticNet, Lasso, Ridge, BayesianRidge

pd.set_option('display.float_format', lambda x: "{:,.5f}".format(x))

## Data Wrangling

In [3]:
boston = load_boston()
dc_scores = {}
df = pd.DataFrame(data = boston["data"], columns=boston["feature_names"])
df["target"] = boston["target"]
tgt = "target"
ls_pred = [x for x in df.columns if x not in [tgt]]
X = df[ls_pred]
y = df[tgt]


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this case special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows:

        from sklearn.datasets import fetch_californi

In [4]:
print(boston["DESCR"])

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [5]:
df.shape

(506, 14)

In [6]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.01,18.0,2.31,0.0,0.54,6.58,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.03,0.0,7.07,0.0,0.47,6.42,78.9,4.97,2.0,242.0,17.8,396.9,9.14,21.6
2,0.03,0.0,7.07,0.0,0.47,7.18,61.1,4.97,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03,0.0,2.18,0.0,0.46,7.0,45.8,6.06,3.0,222.0,18.7,394.63,2.94,33.4
4,0.07,0.0,2.18,0.0,0.46,7.15,54.2,6.06,3.0,222.0,18.7,396.9,5.33,36.2


## Modelado

### Regresión Lineal

In [7]:
linreg = LinearRegression()

In [8]:
linreg.fit(X, y)

LinearRegression()

In [12]:
ls_res = cross_val_score(estimator = linreg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [13]:
ls_res

array([ 0.60217169,  0.60398145,  0.35873597, -1.10867706])

In [14]:
np.mean(ls_res), np.std(ls_res)

(0.11405301290101522, 0.712956573713099)

In [15]:
linreg.intercept_

36.45948838508978

In [21]:
pd.DataFrame(zip(X.columns, linreg.coef_)).sort_values(by=1)

Unnamed: 0,0,1
4,NOX,-17.77
7,DIS,-1.48
10,PTRATIO,-0.95
12,LSTAT,-0.52
0,CRIM,-0.11
9,TAX,-0.01
6,AGE,0.0
11,B,0.01
2,INDUS,0.02
1,ZN,0.05


In [26]:
dc_scores.update({str(linreg).split("(")[0]: np.mean(ls_res)})

In [27]:
str(linreg).split("(")[0]

'LinearRegression'

In [28]:
dc_scores

{'LinearRegression': 0.11405301290101522}

### Regresión LARS

In [29]:
larsreg = Lars()

In [30]:
larsreg = larsreg.fit(X, y)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




Lars()

In [31]:
ls_res = cross_val_score(estimator = larsreg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [33]:
ls_res

array([ 0.6015054 ,  0.60398145,  0.35873597, -1.10867706])

In [32]:
np.mean(ls_res), np.std(ls_res)

(0.11388643958845374, 0.712842580203554)

In [34]:
larsreg.intercept_

36.95133114391082

In [35]:
larsreg.coef_

array([-1.09921668e-01,  4.77494449e-02,  3.42654195e-02,  2.67396468e+00,
       -1.82501210e+01,  3.80245356e+00,  9.79271355e-04, -1.48628516e+00,
        3.20424027e-01, -1.31267747e-02, -9.60367186e-01,  9.38636165e-03,
       -5.26015958e-01])

In [36]:
dc_scores.update({str(larsreg).split("(")[0]: np.mean(ls_res)})

In [37]:
dc_scores

{'LinearRegression': 0.11405301290101522, 'Lars': 0.11388643958845374}

### Regresión Cresta

In [79]:
ridgereg = Ridge(alpha=170)

In [80]:
ridgereg.fit(X, y)

Ridge(alpha=170)

In [81]:
ls_res = cross_val_score(estimator = ridgereg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [82]:
ls_res

array([ 0.58979113,  0.58231525,  0.47761264, -0.26179644])

In [42]:
np.mean(ls_res), np.std(ls_res)

(0.34698064785515315, 0.35426457700618036)

In [46]:
for i in range(0, 1000, 5):
    ridgereg = Ridge(alpha=i)
    ridgereg.fit(X, y)
    ls_res = cross_val_score(estimator = ridgereg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")
    print(i, "{:,.2%}".format(np.mean(ls_res)), "{:,.2f}".format(np.std(ls_res)))

0 11.41% 0.71
5 20.01% 0.61
10 22.43% 0.57
15 24.22% 0.55
20 25.67% 0.53
25 26.89% 0.51
30 27.92% 0.49
35 28.81% 0.48
40 29.58% 0.47
45 30.25% 0.46
50 30.84% 0.45
55 31.35% 0.44
60 31.79% 0.43
65 32.19% 0.42
70 32.53% 0.41
75 32.84% 0.41
80 33.10% 0.40
85 33.34% 0.40
90 33.55% 0.39
95 33.73% 0.39
100 33.89% 0.39
105 34.02% 0.38
110 34.15% 0.38
115 34.25% 0.38
120 34.34% 0.37
125 34.42% 0.37
130 34.48% 0.37
135 34.54% 0.37
140 34.58% 0.36
145 34.62% 0.36
150 34.65% 0.36
155 34.67% 0.36
160 34.69% 0.36
165 34.69% 0.36
170 34.70% 0.35
175 34.70% 0.35
180 34.69% 0.35
185 34.68% 0.35
190 34.67% 0.35
195 34.65% 0.35
200 34.63% 0.35
205 34.61% 0.35
210 34.58% 0.35
215 34.55% 0.35
220 34.52% 0.35
225 34.49% 0.35
230 34.46% 0.34
235 34.42% 0.34
240 34.38% 0.34
245 34.35% 0.34
250 34.31% 0.34
255 34.26% 0.34
260 34.22% 0.34
265 34.18% 0.34
270 34.14% 0.34
275 34.09% 0.34
280 34.05% 0.34
285 34.00% 0.34
290 33.95% 0.34
295 33.91% 0.34
300 33.86% 0.34
305 33.81% 0.34
310 33.76% 0.34
315 33.72% 0.3

In [49]:
ridgereg.intercept_

39.526619339976584

In [52]:
pd.DataFrame(zip(X.columns, ridgereg.coef_)).sort_values(by=1)

Unnamed: 0,0,1
7,DIS,-1.08
10,PTRATIO,-0.83
12,LSTAT,-0.7
4,NOX,-0.15
0,CRIM,-0.1
2,INDUS,-0.05
9,TAX,-0.02
6,AGE,0.01
11,B,0.01
1,ZN,0.06


In [83]:
dc_scores.update({str(ridgereg).split("(")[0]: np.mean(ls_res)})

In [54]:
dc_scores

{'LinearRegression': 0.11405301290101522,
 'Lars': 0.11388643958845374,
 'Ridge': 0.2873317855328939}

### Regresión Lasso

In [73]:
lassreg = Lasso(alpha=170/100)

In [74]:
lassreg.fit(X, y)

Lasso(alpha=1.7)

In [75]:
ls_res = cross_val_score(estimator = lassreg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [76]:
ls_res

array([ 0.43962247,  0.4881991 ,  0.3038345 , -0.00741578])

In [61]:
np.mean(ls_res), np.std(ls_res)

(0.3060600729140215, 0.19318747453416638)

In [63]:
lassreg.intercept_

43.14580423775246

In [69]:
pd.DataFrame(zip(X.columns, lassreg.coef_)).sort_values(by=1)

Unnamed: 0,0,1
12,LSTAT,-0.82303
10,PTRATIO,-0.61916
7,DIS,-0.25814
0,CRIM,-0.03442
9,TAX,-0.0131
2,INDUS,-0.0
3,CHAS,0.0
4,NOX,-0.0
5,RM,0.0
11,B,0.00725


In [70]:
for i in range(0, 1000, 10):
    lasso = Lasso(alpha=i/100)
    lasso.fit(X, y)
    ls_res = cross_val_score(estimator = lasso, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")
    print(i, "{:,.2%}".format(np.mean(ls_res)), "{:,.2f}".format(np.std(ls_res)))

  This is separate from the ipykernel package so we can avoid doing imports until
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


0 11.41% 0.71
10 19.19% 0.61
20 21.23% 0.58
30 20.97% 0.57
40 20.35% 0.57
50 21.96% 0.53
60 23.64% 0.49
70 25.26% 0.45
80 26.61% 0.41
90 27.71% 0.38
100 28.54% 0.35
110 29.11% 0.32
120 29.58% 0.29
130 29.80% 0.26
140 30.09% 0.24
150 30.35% 0.22
160 30.56% 0.21
170 30.61% 0.19
180 30.28% 0.19
190 29.87% 0.18
200 29.49% 0.17
210 29.15% 0.16
220 28.76% 0.16
230 28.33% 0.15
240 28.03% 0.15
250 27.76% 0.14
260 27.46% 0.14
270 27.14% 0.13
280 26.80% 0.13
290 26.44% 0.12
300 26.05% 0.12
310 25.75% 0.11
320 25.45% 0.11
330 25.11% 0.10
340 24.76% 0.10
350 24.44% 0.10
360 24.17% 0.10
370 24.07% 0.09
380 24.11% 0.09
390 24.15% 0.09
400 24.17% 0.09
410 24.19% 0.09
420 24.19% 0.09
430 24.21% 0.09
440 24.22% 0.09
450 24.22% 0.08
460 24.21% 0.08
470 24.24% 0.08
480 24.28% 0.08
490 24.28% 0.08
500 24.23% 0.08
510 24.17% 0.08
520 24.12% 0.08
530 24.06% 0.08
540 23.99% 0.08
550 23.93% 0.08
560 23.86% 0.08
570 23.78% 0.08
580 23.71% 0.08
590 23.63% 0.08
600 23.54% 0.08
610 23.46% 0.08
620 23.37% 0.08
630

In [77]:
dc_scores.update({str(lassreg).split("(")[0]: np.mean(ls_res)})

dc_scores

### Red elástica

In [101]:
elasnet = ElasticNet(alpha=0.5, l1_ratio=0.000001)

In [102]:
elasnet.fit(X, y)

ElasticNet(alpha=0.5, l1_ratio=1e-06)

In [103]:
ls_res = cross_val_score(estimator = elasnet, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [104]:
ls_res

array([ 0.58582377,  0.57947968,  0.47651412, -0.25495132])

In [96]:
np.mean(ls_res), np.std(ls_res)

(0.3467165616106645, 0.35007233834607887)

In [97]:
df_res = pd.DataFrame(columns=["alpha", "l1_ratio", "score", "std"])
contador = 0
for i in range(0, 1000, 10):
    for j in range(10):
        elasnet = ElasticNet(alpha=i/100, l1_ratio=j/10)
        elasnet.fit(X, y)
        ls_res = cross_val_score(estimator = elasnet, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")
        df_res.loc[contador, "alpha"] = i
        df_res.loc[contador, "l1_ratio"] = j
        df_res.loc[contador, "score"] = np.mean(ls_res)
        df_res.loc[contador, "std"] = np.std(ls_res)
        contador += 1
#         print(i/100, j/10, "{:,.2%}".format(np.mean(ls_res)), "{:,.2f}".format(np.std(ls_res)))

  
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  
  coef_, l1_reg, l2_reg,

In [98]:
contador

1000

In [99]:
df_res

Unnamed: 0,alpha,l1_ratio,score,std
0,0,0,0.11405,0.71296
1,0,1,0.11405,0.71296
2,0,2,0.11405,0.71296
3,0,3,0.11405,0.71296
4,0,4,0.11405,0.71296
...,...,...,...,...
995,990,5,0.21215,0.07710
996,990,6,0.20935,0.07455
997,990,7,0.20326,0.07262
998,990,8,0.19611,0.07142


In [100]:
df_res.sort_values(by = "score")

Unnamed: 0,alpha,l1_ratio,score,std
0,0,0,0.11405,0.71296
1,0,1,0.11405,0.71296
2,0,2,0.11405,0.71296
3,0,3,0.11405,0.71296
4,0,4,0.11405,0.71296
...,...,...,...,...
70,70,0,0.34176,0.34149
30,30,0,0.34234,0.37591
60,60,0,0.34475,0.34471
40,40,0,0.34662,0.35932


In [105]:
elasnet.intercept_

41.42139506351087

In [107]:
pd.DataFrame(zip(X.columns, elasnet.coef_)).sort_values(by=1)

Unnamed: 0,0,1
7,DIS,-1.00841
10,PTRATIO,-0.82171
12,LSTAT,-0.72068
0,CRIM,-0.09995
4,NOX,-0.09624
2,INDUS,-0.05039
9,TAX,-0.01659
11,B,0.00879
6,AGE,0.01231
1,ZN,0.05563


In [108]:
dc_scores.update({str(elasnet).split("(")[0]: np.mean(ls_res)})

In [109]:
dc_scores

{'LinearRegression': 0.11405301290101522,
 'Lars': 0.11388643958845374,
 'Ridge': 0.34698064785515315,
 'Lasso': 0.3060600729140215,
 'ElasticNet': 0.3467165616106645}

### Regresión Bayesiana

In [110]:
bayreg = BayesianRidge()

In [112]:
bayreg.fit(X, y)

BayesianRidge()

In [118]:
ls_res = cross_val_score(estimator = bayreg, X=X, y=y, cv=4, n_jobs=-1, scoring="r2")

In [119]:
ls_res

array([ 0.64182838,  0.61663703,  0.37058843, -0.77508562])

In [120]:
np.mean(ls_res), np.std(ls_res)

(0.213492055305246, 0.5805090395044222)

In [121]:
bayreg.intercept_

27.551854857902597

In [123]:
pd.DataFrame(zip(X.columns, bayreg.coef_)).sort_values(by=1)

Unnamed: 0,0,1
4,NOX,-2.14193
7,DIS,-1.24523
10,PTRATIO,-0.79726
12,LSTAT,-0.56188
0,CRIM,-0.10144
2,INDUS,-0.04384
9,TAX,-0.01406
6,AGE,-0.01062
11,B,0.01004
1,ZN,0.04974


In [124]:
dc_scores.update({str(bayreg).split("(")[0]: np.mean(ls_res)})

In [125]:
dc_scores

{'LinearRegression': 0.11405301290101522,
 'Lars': 0.11388643958845374,
 'Ridge': 0.34698064785515315,
 'Lasso': 0.3060600729140215,
 'ElasticNet': 0.3467165616106645,
 'BayesianRidge': 0.213492055305246}

In [126]:
resul = pd.DataFrame(columns=[])
alfas = pd.DataFrame(columns=[])
for model in [linreg, larsreg, ridgereg, lassreg, elasnet, bayreg]:
    resul[str(model).split("(")[0]] = model.coef_
    alfas[str(model).split("(")[0]] = [model.intercept_]

In [128]:
alfas

Unnamed: 0,LinearRegression,Lars,Ridge,Lasso,ElasticNet,BayesianRidge
0,36.45949,36.95133,39.52662,43.1458,41.4214,27.55185


In [129]:
resul

Unnamed: 0,LinearRegression,Lars,Ridge,Lasso,ElasticNet,BayesianRidge
0,-0.10801,-0.10992,-0.10131,-0.03442,-0.09995,-0.10144
1,0.04642,0.04775,0.0554,0.04128,0.05563,0.04974
2,0.02056,0.03427,-0.05245,-0.0,-0.05039,-0.04384
3,2.68673,2.67396,0.42892,0.0,0.31288,1.89485
4,-17.76661,-18.25012,-0.15001,-0.0,-0.09624,-2.14193
5,3.80987,3.80245,1.82569,0.0,1.46092,3.67401
6,0.00069,0.00098,0.0072,0.03837,0.01231,-0.01062
7,-1.47557,-1.48629,-1.08464,-0.25814,-1.00841,-1.24523
8,0.30605,0.32042,0.32549,0.20861,0.33046,0.28022
9,-0.01233,-0.01313,-0.01632,-0.0131,-0.01659,-0.01406


In [130]:
resul["features"] = ls_pred
resul = resul.set_index("features")
alfas["features"] = ["intercepto"]
alfas = alfas.set_index("features")

In [131]:
resul = alfas.append(resul)

In [133]:
resul.sort_values(by="Ridge")

Unnamed: 0_level_0,LinearRegression,Lars,Ridge,Lasso,ElasticNet,BayesianRidge
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DIS,-1.47557,-1.48629,-1.08464,-0.25814,-1.00841,-1.24523
PTRATIO,-0.95275,-0.96037,-0.83129,-0.61916,-0.82171,-0.79726
LSTAT,-0.52476,-0.52602,-0.69605,-0.82303,-0.72068,-0.56188
NOX,-17.76661,-18.25012,-0.15001,-0.0,-0.09624,-2.14193
CRIM,-0.10801,-0.10992,-0.10131,-0.03442,-0.09995,-0.10144
INDUS,0.02056,0.03427,-0.05245,-0.0,-0.05039,-0.04384
TAX,-0.01233,-0.01313,-0.01632,-0.0131,-0.01659,-0.01406
AGE,0.00069,0.00098,0.0072,0.03837,0.01231,-0.01062
B,0.00931,0.00939,0.00906,0.00725,0.00879,0.01004
ZN,0.04642,0.04775,0.0554,0.04128,0.05563,0.04974


* CRIM: per capita crime rate by town
* ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
* INDUS    proportion of non-retail business acres per town
* CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
* NOX      nitric oxides concentration (parts per 10 million)
* RM       average number of rooms per dwelling
* AGE      proportion of owner-occupied units built prior to 1940
* DIS      weighted distances to five Boston employment centres
* RAD      index of accessibility to radial highways
* TAX      full-value property-tax rate per \$10,000
* PTRATIO  pupil-teacher ratio by town
* B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
* LSTAT    % lower status of the population
* MEDV     Median value of owner-occupied homes in $1000's

In [134]:
df["NOX"].describe()

count   506.00000
mean      0.55470
std       0.11588
min       0.38500
25%       0.44900
50%       0.53800
75%       0.62400
max       0.87100
Name: NOX, dtype: float64

In [136]:
df["RM"].describe()

count   506.00000
mean      6.28463
std       0.70262
min       3.56100
25%       5.88550
50%       6.20850
75%       6.62350
max       8.78000
Name: RM, dtype: float64

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive
  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rn

In [137]:
pd.to_pickle(ridgereg, "modelo.diplo")

In [138]:
otr_modelo = pd.read_pickle("modelo.diplo")

In [139]:
otr_modelo

Ridge(alpha=170)

In [140]:
import pickle

In [141]:
with open("modelo_pickle.diplo", "wb") as f:
    pickle.dump(ridgereg, f)

In [142]:
with open("modelo_pickle.diplo", "rb") as f:
    otro_modelo = pickle.load(f)

In [143]:
otro_modelo

Ridge(alpha=170)

In [144]:
df["y_hat"] = otro_modelo.predict(X)

In [145]:
df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target,y_hat
0,0.00632,18.00000,2.31000,0.00000,0.53800,6.57500,65.20000,4.09000,1.00000,296.00000,15.30000,396.90000,4.98000,24.00000,31.26435
1,0.02731,0.00000,7.07000,0.00000,0.46900,6.42100,78.90000,4.96710,2.00000,242.00000,17.80000,396.90000,9.14000,21.60000,25.12450
2,0.02729,0.00000,7.07000,0.00000,0.46900,7.18500,61.10000,4.96710,2.00000,242.00000,17.80000,392.83000,4.03000,34.70000,29.91116
3,0.03237,0.00000,2.18000,0.00000,0.45800,6.99800,45.80000,6.06220,3.00000,222.00000,18.70000,394.63000,2.94000,33.40000,29.20806
4,0.06905,0.00000,2.18000,0.00000,0.45800,7.14700,54.20000,6.06220,3.00000,222.00000,18.70000,396.90000,5.33000,36.20000,27.89384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.00000,11.93000,0.00000,0.57300,6.59300,69.10000,2.47860,1.00000,273.00000,21.00000,391.99000,9.67000,22.40000,23.88824
502,0.04527,0.00000,11.93000,0.00000,0.57300,6.12000,76.70000,2.28750,1.00000,273.00000,21.00000,396.90000,9.08000,20.60000,23.74358
503,0.06076,0.00000,11.93000,0.00000,0.57300,6.97600,91.00000,2.16750,1.00000,273.00000,21.00000,396.90000,5.64000,23.90000,27.93233
504,0.10959,0.00000,11.93000,0.00000,0.57300,6.79400,89.30000,2.38890,1.00000,273.00000,21.00000,393.45000,6.48000,22.00000,26.72679


In [None]:
df