<center><u><H1>Modelos Avanzados para Regresion</H1></u></center>

In [1]:
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
from sklearn.metrics import r2_score
import numpy as np
%matplotlib inline
#pip install --upgrade joblib

## Loading Data

In [2]:
boston = load_boston()
X = boston.data
y = boston.target

## Preprocessing data

In [3]:
y = y[:, np.newaxis]

sc_X = StandardScaler()
sc_y = StandardScaler()

X_std = sc_X.fit_transform(X)
y_std = sc_y.fit_transform(y)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_std, y_std, test_size=0.3, random_state=2019)

In [5]:
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

## Creating models

In [6]:
svr = SVR(kernel='rbf', C=10.0, gamma='scale')

knn_r = KNeighborsRegressor(n_neighbors=3, weights='distance', algorithm='auto')

rf_r = RandomForestRegressor(max_depth=8, n_estimators=1000)

## Support Vector Machines for Regression:

In [7]:
svr.fit(X_train,y_train)
svr_pred = svr.predict(X_test)

print("R2 = {:.2}".format(r2_score(y_test, svr_pred)))

R2 = 0.82


## K-Nearest Neighbors for Regression:

In [8]:
knn_r.fit(X_train, y_train)
knn_pred = knn_r.predict(X_test)

print("R2 = {:.2}".format(r2_score(y_test, knn_pred)))

R2 = 0.71


## Random Forest for Regression:

In [9]:
rf_r.fit(X_train, y_train)
rf_pred = rf_r.predict(X_test)

print("R2 = {:.2}".format(r2_score(y_test, rf_pred)))

R2 = 0.84


## Gradient Boosting Regressor

In [10]:
%%time
gb_model = GradientBoostingRegressor()
parameters = {'learning_rate': [0.01,0.05],
                  'subsample'    : [0.2, 0.5],
                  'n_estimators' : [500,1000],
                  'max_depth'    : [5,10],
                  'random_state':[2019]
                 }
grid = GridSearchCV(estimator=gb_model, param_grid = parameters, iid=True, cv = 5)
#iid If True, return the average score across folds.
grid.fit(X_train, y_train)  

Wall time: 37.8 s


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=GradientBoostingRegressor(alpha=0.9,
                                                 criterion='friedman_mse',
                                                 init=None, learning_rate=0.1,
                                                 loss='ls', max_depth=3,
                                                 max_features=None,
                                                 max_leaf_nodes=None,
                                                 min_impurity_decrease=0.0,
                                                 min_impurity_split=None,
                                                 min_samples_leaf=1,
                                                 min_samples_split=2,
                                                 min_weight_fraction_leaf=0.0,
                                                 n_estimators=100,
                                                 n_iter...=None,
                       

In [11]:
grid.best_estimator_

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
                          learning_rate=0.01, loss='ls', max_depth=10,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=1000,
                          n_iter_no_change=None, presort='auto',
                          random_state=2019, subsample=0.5, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [12]:
gb = grid.best_estimator_
gb.fit(X_train, y_train)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
                          learning_rate=0.01, loss='ls', max_depth=10,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=1000,
                          n_iter_no_change=None, presort='auto',
                          random_state=2019, subsample=0.5, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

### Metrics

In [13]:
gb_pred = gb.predict(X_test)
print("R2 = {:.2}".format(r2_score(y_test, gb_pred)))

R2 = 0.86


## XGBoost Regressor

In [14]:
%%time
xgb_model = xgb.XGBRegressor()
parameters = {'objective':["reg:squarederror"],
                  'colsample_bytree':[0.6],
                  'learning_rate': [0.01],
                  'subsample'    : [0.3, 0.5],
                  'n_estimators' : [5000],
                  'max_depth'    : [3,5,8],
                  'reg_alpha':[0.01, 0.1],
                  'reg_lambda':[0.1, 0.5],
                  'random_state':[2019]
                 }
grid_ = GridSearchCV(estimator=xgb_model, param_grid = parameters, iid=True, cv = 5)
grid_.fit(X_train, y_train)  

Wall time: 5min 42s


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=XGBRegressor(base_score=0.5, booster='gbtree',
                                    colsample_bylevel=1, colsample_bynode=1,
                                    colsample_bytree=1, gamma=0,
                                    importance_type='gain', learning_rate=0.1,
                                    max_delta_step=0, max_depth=3,
                                    min_child_weight=1, missing=None,
                                    n_estimators=100, n_jobs=1, nthread=None,
                                    objective='reg:linear', random_state=0,...
                                    subsample=1, verbosity=1),
             iid=True, n_jobs=None,
             param_grid={'colsample_bytree': [0.6], 'learning_rate': [0.01],
                         'max_depth': [3, 5, 8], 'n_estimators': [5000],
                         'objective': ['reg:squarederror'],
                         'random_state': [2019], 'reg_

In [15]:
grid_.best_estimator_

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.6, gamma=0,
             importance_type='gain', learning_rate=0.01, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=5000,
             n_jobs=1, nthread=None, objective='reg:squarederror',
             random_state=2019, reg_alpha=0.01, reg_lambda=0.1,
             scale_pos_weight=1, seed=None, silent=None, subsample=0.5,
             verbosity=1)

In [16]:
xgb_model = grid_.best_estimator_
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

In [17]:
print("R2 = {:.2}".format(r2_score(y_test, xgb_pred)))

R2 = 0.86


In [18]:
xgb_pred_train = xgb_model.predict(X_train)
print("R2 = {:.2}".format(r2_score(y_train, xgb_pred_train)))

R2 = 1.0


## Reference:

http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html

http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html

http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

https://xgboost.readthedocs.io/en/latest/parameter.html