# Regresion

## Regresion lineal y regularizacion

In [2]:
import pandas as pd

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet

boston=load_boston()
df=pd.DataFrame(boston.data, columns=boston.feature_names)
df['Price']=boston.target

df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [5]:
?load_boston

In [4]:
X=df.drop('Price', axis=1)
y=df.Price

In [6]:
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2)

In [7]:
# modelos

linreg=LinearRegression()
linreg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [8]:
lasso=Lasso()
lasso.fit(X_train, y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [9]:
ridge=Ridge()
ridge.fit(X_train, y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [23]:
elastic=ElasticNet(alpha=0.01)
elastic.fit(X_train, y_train)

ElasticNet(alpha=0.01, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [11]:
train_score=linreg.score(X_train, y_train)
test_score=linreg.score(X_test, y_test)
print (train_score, test_score)

0.748100021394221 0.6990862349812188


In [12]:
train_score=lasso.score(X_train, y_train)
test_score=lasso.score(X_test, y_test)
print (train_score, test_score)

0.6860427119916477 0.6879857530739206


In [13]:
train_score=ridge.score(X_train, y_train)
test_score=ridge.score(X_test, y_test)
print (train_score, test_score)

0.745064329004498 0.7046320018888819


In [24]:
train_score=elastic.score(X_train, y_train)
test_score=elastic.score(X_test, y_test)
print (train_score, test_score)

0.7417335555773477 0.7050253678250442


## SVR

In [28]:
import warnings
warnings.simplefilter('ignore')

from sklearn.svm import SVR

svr=SVR()
svr.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
    gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
    tol=0.001, verbose=False)

In [29]:
train_score=svr.score(X_train, y_train)
test_score=svr.score(X_test, y_test)
print (train_score, test_score)

0.13870643771270086 -0.007770532133630548


## Random Forest

In [30]:
from sklearn.ensemble import RandomForestRegressor as RFR

rf=RFR()
rf.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=10,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

In [31]:
train_score=rf.score(X_train, y_train)
test_score=rf.score(X_test, y_test)
print (train_score, test_score)

0.9641054425304734 0.9100261155630658


In [41]:
# gridsearching
from sklearn.model_selection import GridSearchCV

def grid(modelo, param, cv=5):
    grid=GridSearchCV(modelo, param, cv=cv, iid=True, n_jobs=-1)
    grid.fit(X, y)
    
    print (grid.score(X_train, y_train), grid.score(X_test, y_test))
    print (grid.best_params_)
    print (grid.best_score_)
    
    return grid.best_estimator_.fit(X_train, y_train)

In [51]:
param={'max_leaf_nodes':[5,10,20, 50], 'n_estimators':[i+1 for i in range(50) if i%5==0]}

%time grid(rf, param)

0.9269079993664873 0.9272174727628295
{'max_leaf_nodes': 20, 'n_estimators': 26}
0.6405917428227192
CPU times: user 682 ms, sys: 109 ms, total: 791 ms
Wall time: 3.3 s


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=20,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=26,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

In [52]:
rf_gs= grid(rf, param)

0.9654656824155339 0.9664968397541917
{'max_leaf_nodes': 50, 'n_estimators': 41}
0.6346123658088841


In [53]:
y_pred=rf_gs.predict(X_test)
y_pred

array([15.10215993, 33.0976862 , 22.80993398, 10.12511922, 14.75229614,
       10.56161799, 33.63494325, 20.54940274, 48.36117605, 21.3134652 ,
       16.71350856, 19.77906526, 21.72803598, 23.75965768, 22.98082279,
       19.76476245, 22.64803555, 19.32675558, 34.33969717, 22.28035147,
       34.05006957, 21.10632131, 24.4240702 , 36.36910439, 17.16896342,
       37.34199808,  7.98149981, 11.69909834, 20.2113765 , 10.96988706,
       17.83996375, 20.1229443 , 20.71704645, 44.88258619, 24.45327293,
       15.15320484, 20.54703638, 17.49988364, 24.20755377, 19.78584049,
       11.8572506 , 45.22734086, 22.21725151, 15.93101502, 15.91644934,
       27.16267701, 22.59956982, 20.63193348, 26.41028385, 21.94139486,
       14.77402711, 32.83274907, 20.12205549, 14.95785312, 22.76852036,
       33.34084544, 17.35174604, 30.92975437, 15.24520842, 29.95071108,
       36.33306706, 31.83956688, 29.25225602, 12.39146735, 23.15366757,
       19.78878778, 20.94032828, 23.82392618, 19.10213747, 23.86

In [54]:
from sklearn.tree import ExtraTreeRegressor as ETR

etr=ETR(min_samples_leaf=2)
etr.fit(X_train, y_train)

ExtraTreeRegressor(criterion='mse', max_depth=None, max_features='auto',
                   max_leaf_nodes=None, min_impurity_decrease=0.0,
                   min_impurity_split=None, min_samples_leaf=2,
                   min_samples_split=2, min_weight_fraction_leaf=0.0,
                   random_state=None, splitter='random')

In [48]:
train_score=etr.score(X_train, y_train)
test_score=etr.score(X_test, y_test)
print (train_score, test_score)

0.9552322876768367 0.8096757903783823


# Clasificacion