# Lasso, Ridge, ElasticNet

In [1]:
import pandas as pd

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet

In [3]:
boston=load_boston()


In [5]:
df=pd.DataFrame(boston.data, columns=boston.feature_names)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [6]:
df['price']=boston.target
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [7]:
X=df.drop('price', axis=1)
y=df.price

In [8]:
X_train, X_test, y_train, y_test=train_test_split(X,y, test_size=0.2)

In [9]:
#modelos

linreg=LinearRegression()
linreg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [10]:
lasso=Lasso()
lasso.fit(X_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [11]:
ridge=Ridge()
ridge.fit(X_train, y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [12]:
elastic=ElasticNet()
elastic.fit(X_train, y_train)

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [13]:
train_score=linreg.score(X_train, y_train)    # R2
test_score=linreg.score(X_test, y_test)
print (train_score, test_score)

0.720587035208675 0.7750368224678431


In [14]:
train_score=lasso.score(X_train, y_train)
test_score=lasso.score(X_test, y_test)
print (train_score, test_score)

0.6550679482210249 0.6370548162738527


In [15]:
train_score=ridge.score(X_train, y_train)
test_score=ridge.score(X_test, y_test)
print (train_score, test_score)

0.7179823115349706 0.773783924687844


In [16]:
train_score=elastic.score(X_train, y_train)
test_score=elastic.score(X_test, y_test)
print (train_score, test_score)

0.670763147899029 0.6674443935383241


In [18]:
# SVR
import warnings
warnings.simplefilter('ignore')

from sklearn.svm import SVR

svr=SVR()
svr.fit(X_train, y_train)

train_score=svr.score(X_train, y_train)
test_score=svr.score(X_test, y_test)
print (train_score, test_score)

0.15557322924925565 -0.116632472950426


In [19]:
# random forest regressor

from sklearn.ensemble import RandomForestRegressor as RFR

rf=RFR()
rf.fit(X_train, y_train)

train_score=rf.score(X_train, y_train)
test_score=rf.score(X_test, y_test)
print (train_score, test_score)

0.9685342658969299 0.8895110278018619


In [21]:
#help(RFR())

In [22]:
# GridSearching

import time

from sklearn.model_selection import GridSearchCV


def grid(modelo, param, cv=5):
    grid=GridSearchCV(modelo, param, cv=cv, 
                      iid=True, return_train_score=True, n_jobs=-1)
    
    grid.fit(X_train, y_train)
    
    print('Acierto test: {:.2f}'.format(grid.score(X_test, y_test)))
    print('Acierto train: {:.2f}'.format(grid.score(X_train, y_train)))
    print('Mejores parametros: {}'.format(grid.best_params_))
    print('Mejor acierto cv: {:.2f}'.format(grid.best_score_))
    
    return grid.best_estimator_.fit(X_train, y_train)

In [23]:
param={'max_leaf_nodes':[5,10,20], 'n_estimators':[10,50,100,500]}

In [24]:
grid(rf, param)

Acierto test: 0.91
Acierto train: 0.94
Mejores parametros: {'max_leaf_nodes': 20, 'n_estimators': 50}
Mejor acierto cv: 0.82


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=20,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=50,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

In [25]:
modelo=grid(rf, param)

Acierto test: 0.91
Acierto train: 0.93
Mejores parametros: {'max_leaf_nodes': 20, 'n_estimators': 100}
Mejor acierto cv: 0.82


In [26]:
modelo.predict(X_test)

array([17.01216494, 43.69377832, 33.24848263, 13.39055944, 20.59528625,
       27.5681412 , 21.24525113, 42.52350608, 22.54510166, 24.1524939 ,
       22.40312389, 19.24554236, 25.98670672, 26.26136559, 29.77292083,
       45.33752894, 20.6745306 , 14.69578621, 22.64591979, 20.8546063 ,
       28.47448325, 25.98940554, 38.51934787,  9.0066527 , 15.16915515,
       23.90366292, 23.74104243, 20.6008961 , 20.11675536, 18.79240107,
       43.97858514, 22.52198982, 19.4771734 , 19.73855197, 26.43432452,
       22.66922748, 33.07634735, 24.33359005, 19.61950119, 23.98277685,
       19.95158214, 45.20306813, 46.0716324 , 35.10018375, 45.9226921 ,
       23.86452006,  9.98257318, 22.38903048, 32.98991985, 15.46674256,
       19.86138068, 23.97206733, 25.9386355 , 25.03195824, 20.95576707,
       17.57286281, 33.53771076, 19.99797608, 13.76630271, 19.2975693 ,
       19.06784898, 45.23485014, 20.75151628, 14.02579388, 20.28678185,
       15.21886868, 23.77187433, 22.67104304, 23.84373582, 44.99

In [29]:
# Extratrees

from sklearn.tree import ExtraTreeRegressor as ETR

etr=ETR()
etr.fit(X_train, y_train)

train_score=etr.score(X_train, y_train)
test_score=etr.score(X_test, y_test)
print (train_score, test_score)

1.0 0.840286635853755
