In [27]:
from sklearn.datasets import make_classification ,make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, r2_score


import warnings
warnings.filterwarnings("ignore")

In [28]:
X, y = make_classification(n_samples=1000, n_classes=2, n_features=20, random_state=1)
X_tain, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=1)
clf = GradientBoostingClassifier()
clf.fit(X_tain, y_train)
y_pred = clf.predict(X_test)
print("accuracy", accuracy_score(y_test,y_pred))
print("clf-report",classification_report(y_test, y_pred))
print("confusion matrix", confusion_matrix(y_test, y_pred))

accuracy 0.865
clf-report               precision    recall  f1-score   support

           0       0.85      0.84      0.85        90
           1       0.87      0.88      0.88       110

    accuracy                           0.86       200
   macro avg       0.86      0.86      0.86       200
weighted avg       0.86      0.86      0.86       200

confusion matrix [[76 14]
 [13 97]]


### HyperParameter Tuning

In [4]:
from sklearn.model_selection import GridSearchCV
import numpy as np

param_grid = {"n_estimators":[50,100],# 2
              "learning_rate":[0.001, 0.1, 1], #3
              "loss": ["log_loss", "exponential"], #2
              "criterion":["friedman_mse","squared_error"], #2
              "max_depth":[1,2,3],#10
              "max_features":['sqrt','log2'],#2
              "ccp_alpha":[0,.1,.2]} #3

g_clf =  GradientBoostingClassifier()

clf1 = GridSearchCV(estimator=g_clf, param_grid=param_grid, cv=5, verbose=3)
clf1

In [None]:
clf1.fit(X_tain, y_train)

In [6]:
clf1.best_estimator_

In [7]:
clf1.best_params_

{'ccp_alpha': 0,
 'criterion': 'squared_error',
 'learning_rate': 0.1,
 'loss': 'exponential',
 'max_depth': 3,
 'max_features': 'sqrt',
 'n_estimators': 100}

In [8]:
clf1.best_score_

0.8662500000000002

---------------------------------------------
### Gradient Boosting Regression
-------------------------------------------

In [35]:
X, y = make_regression(n_samples=2000, n_features=2, n_targets=1, random_state=1, noise=0.1)
X_train ,X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

In [36]:
X_train.shape , y_train.shape

((1400, 2), (1400,))

In [37]:
reg = GradientBoostingRegressor(n_estimators=50,learning_rate= 0.1,loss='squared_error',criterion='squared_error')
reg.fit(X_train,y_train)
y_pred = reg.predict(X_test)
r2_score(y_test, y_pred)

0.9971182448577355

### Hyper parameter for GradientBoostingRegressor

In [38]:
from sklearn.model_selection import GridSearchCV

param_grid = {"n_estimators":[50,100],
              "learning_rate":[0.001, 0.1],
              "loss": ["squared_error", "absolute_error"],
              "criterion":["squared_error"]}

gra_reg =  GradientBoostingRegressor()

clf1 = GridSearchCV(estimator=gra_reg, param_grid=param_grid, cv=5, verbose=3)
clf1

In [40]:
clf1.fit(X_train,y_train)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV 1/5] END criterion=squared_error, learning_rate=0.001, loss=squared_error, n_estimators=50;, score=0.090 total time=   0.0s
[CV 2/5] END criterion=squared_error, learning_rate=0.001, loss=squared_error, n_estimators=50;, score=0.087 total time=   0.0s
[CV 3/5] END criterion=squared_error, learning_rate=0.001, loss=squared_error, n_estimators=50;, score=0.086 total time=   0.0s
[CV 4/5] END criterion=squared_error, learning_rate=0.001, loss=squared_error, n_estimators=50;, score=0.083 total time=   0.0s
[CV 5/5] END criterion=squared_error, learning_rate=0.001, loss=squared_error, n_estimators=50;, score=0.092 total time=   0.0s
[CV 1/5] END criterion=squared_error, learning_rate=0.001, loss=squared_error, n_estimators=100;, score=0.171 total time=   0.0s
[CV 2/5] END criterion=squared_error, learning_rate=0.001, loss=squared_error, n_estimators=100;, score=0.168 total time=   0.0s
[CV 3/5] END criterion=squared_error, lear

In [41]:
clf1.best_score_

0.9983583559704995

In [42]:
clf1.best_params_

{'criterion': 'squared_error',
 'learning_rate': 0.1,
 'loss': 'squared_error',
 'n_estimators': 100}