b) Showcase gradient boost regression techniques XGBoost, Catboost, LightGBM

To showcase the regression techniques I am using the California house prices dataset available from sklearn library.

In [None]:
!pip install xgboost
!pip install catboost

In [29]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_california_housing

from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor

from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix

import lightgbm as lgb

In [3]:
chp = fetch_california_housing()

In [4]:
X_train, X_test, y_train, y_test = train_test_split(chp.data, chp.target, random_state=42, test_size=0.1)

In [5]:
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

In [6]:
gbr_params = {'n_estimators': 1000,
          'max_depth': 3,
          'min_samples_split': 5,
          'learning_rate': 0.01,
          'loss': 'ls'}

In [7]:
gbr = GradientBoostingRegressor(**gbr_params)

In [8]:
gbr.fit(X_train_std, y_train)



GradientBoostingRegressor(learning_rate=0.01, loss='ls', min_samples_split=5,
                          n_estimators=1000)

In [9]:
print("Model Accuracy: %.3f" % gbr.score(X_test_std, y_test))
mse = mean_squared_error(y_test, gbr.predict(X_test_std))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))

Model Accuracy: 0.778
The mean squared error (MSE) on test set: 0.2963


**XGBoost**

In [10]:
model = XGBRegressor()

In [11]:
model = XGBRegressor(n_estimators=1000, max_depth=7, eta=0.1, subsample=0.7, colsample_bytree=0.8)

In [12]:
model.fit(X_train_std, y_train)



XGBRegressor(colsample_bytree=0.8, eta=0.1, max_depth=7, n_estimators=1000,
             subsample=0.7)

In [17]:
print("Model Accuracy: %.3f" % model.score(X_test_std, y_test))
predictions = model.predict(X_test_std)
mse = mean_squared_error(y_test, predictions)
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))

Model Accuracy: 0.860
The mean squared error (MSE) on test set: 0.1862


**CatBoost**

In [21]:
model = CatBoostRegressor()

In [24]:
model = CatBoostRegressor(loss_function='RMSE')

In [25]:
model.fit(X_train_std, y_train)

Learning rate set to 0.064964
0:	learn: 1.1118365	total: 52.2ms	remaining: 52.2s
1:	learn: 1.0745729	total: 57.1ms	remaining: 28.5s
2:	learn: 1.0394318	total: 61.7ms	remaining: 20.5s
3:	learn: 1.0069017	total: 66.7ms	remaining: 16.6s
4:	learn: 0.9764213	total: 71.7ms	remaining: 14.3s
5:	learn: 0.9488672	total: 76.4ms	remaining: 12.7s
6:	learn: 0.9220938	total: 81ms	remaining: 11.5s
7:	learn: 0.8977221	total: 86ms	remaining: 10.7s
8:	learn: 0.8755553	total: 91.1ms	remaining: 10s
9:	learn: 0.8537168	total: 96ms	remaining: 9.5s
10:	learn: 0.8347515	total: 101ms	remaining: 9.05s
11:	learn: 0.8166573	total: 108ms	remaining: 8.87s
12:	learn: 0.8009991	total: 112ms	remaining: 8.54s
13:	learn: 0.7845642	total: 117ms	remaining: 8.27s
14:	learn: 0.7714426	total: 122ms	remaining: 8.02s
15:	learn: 0.7588320	total: 127ms	remaining: 7.81s
16:	learn: 0.7457128	total: 132ms	remaining: 7.62s
17:	learn: 0.7338124	total: 137ms	remaining: 7.46s
18:	learn: 0.7243138	total: 141ms	remaining: 7.3s
19:	learn: 

<catboost.core.CatBoostRegressor at 0x7f63d4195c90>

In [26]:
print("Model Accuracy: %.3f" % model.score(X_test_std, y_test))
predictions = model.predict(X_test_std)
mse = mean_squared_error(y_test, predictions)
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))

Model Accuracy: 0.856
The mean squared error (MSE) on test set: 0.1921


**LightGBM**

In [41]:
hyper_params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': ['l1','l2'],
    'learning_rate': 0.005,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.7,
    'bagging_freq': 10,
    'verbose': 0,
    "max_depth": 8,
    "num_leaves": 128,  
    "max_bin": 512,
    "num_iterations": 10000
}

In [42]:
model = lgb.LGBMRegressor(**hyper_params)

In [43]:
model.fit(X_train_std, y_train)



LGBMRegressor(bagging_fraction=0.7, bagging_freq=10, feature_fraction=0.9,
              learning_rate=0.005, max_bin=512, max_depth=8,
              metric=['l1', 'l2'], num_iterations=10000, num_leaves=128,
              objective='regression', task='train', verbose=0)

In [44]:
print("Model Accuracy: %.3f" % model.score(X_test_std, y_test))
predictions = model.predict(X_test_std)
mse = mean_squared_error(y_test, predictions)
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))

Model Accuracy: 0.863
The mean squared error (MSE) on test set: 0.1824
