In [1]:
from sklearn.datasets import  make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Gradient Dataset

In [3]:
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

#split dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [4]:
X_train

array([[-0.61855767,  0.52885125, -1.88296138, ...,  1.14413655,
        -0.1606244 ,  1.04841912],
       [-1.18862211, -1.84599655,  0.75481673, ..., -0.2068231 ,
         0.00762745, -1.15608593],
       [ 2.72175361, -0.53291851,  0.15911922, ...,  0.29478576,
         0.32873813,  0.03001774],
       ...,
       [-0.00383186,  0.09203615, -0.70121505, ..., -2.16249308,
        -0.33068834,  0.07266646],
       [ 2.45888615, -0.35941149,  0.02597242, ..., -0.056518  ,
        -0.59516384, -0.51703825],
       [ 1.84222699, -0.46434485, -0.05381296, ...,  0.80643863,
         0.40912571, -0.19642545]])

In [5]:
clf = GradientBoostingClassifier()
clf.fit(X_train, y_train)

# Make Predictions 

In [6]:
y_pred = clf.predict(X_test)

# Evaluate the model
print('Current model performance:')
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print('Classification Report:')
print(classification_report(y_test, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))


Current model performance:
Accuracy: 0.8545454545454545
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.85       154
           1       0.88      0.85      0.86       176

    accuracy                           0.85       330
   macro avg       0.85      0.86      0.85       330
weighted avg       0.86      0.85      0.85       330

Confusion Matrix:
[[133  21]
 [ 27 149]]


# Hyperparameter tunning 

In [7]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

# Define the parameter grid suitable for GradientBoostingClassifier
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2, 0.5, 1.0],
    'max_depth': [3, 4, 5, 6],
    'subsample': [0.8, 0.9, 1.0]
}

# Initialize the GradientBoostingClassifier
gbc = GradientBoostingClassifier()

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=gbc, param_grid=param_grid, cv=5, verbose=3)

# Print grid_search to confirm setup
print(grid_search)

# Assuming X_train and y_train are defined
grid_search.fit(X_train, y_train)

# Get the best parameters and best score
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_}")


GridSearchCV(cv=5, estimator=GradientBoostingClassifier(),
             param_grid={'learning_rate': [0.01, 0.1, 0.2, 0.5, 1.0],
                         'max_depth': [3, 4, 5, 6],
                         'n_estimators': [50, 100, 200],
                         'subsample': [0.8, 0.9, 1.0]},
             verbose=3)
Fitting 5 folds for each of 180 candidates, totalling 900 fits
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.881 total time=   0.1s
[CV 2/5] END learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.888 total time=   0.1s
[CV 3/5] END learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.843 total time=   0.1s
[CV 4/5] END learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.866 total time=   0.1s
[CV 5/5] END learning_rate=0.01, max_depth=3, n_estimators=50, subsample=0.8;, score=0.821 total time=   0.1s
[CV 1/5] END learning_rate=0.01, max_depth=3, n_estimators=50, subsam

[CV 2/5] END learning_rate=0.01, max_depth=4, n_estimators=100, subsample=1.0;, score=0.896 total time=   0.6s
[CV 3/5] END learning_rate=0.01, max_depth=4, n_estimators=100, subsample=1.0;, score=0.836 total time=   0.6s
[CV 4/5] END learning_rate=0.01, max_depth=4, n_estimators=100, subsample=1.0;, score=0.836 total time=   0.8s
[CV 5/5] END learning_rate=0.01, max_depth=4, n_estimators=100, subsample=1.0;, score=0.821 total time=   0.8s
[CV 1/5] END learning_rate=0.01, max_depth=4, n_estimators=200, subsample=0.8;, score=0.881 total time=   1.3s
[CV 2/5] END learning_rate=0.01, max_depth=4, n_estimators=200, subsample=0.8;, score=0.896 total time=   1.4s
[CV 3/5] END learning_rate=0.01, max_depth=4, n_estimators=200, subsample=0.8;, score=0.836 total time=   1.2s
[CV 4/5] END learning_rate=0.01, max_depth=4, n_estimators=200, subsample=0.8;, score=0.858 total time=   1.2s
[CV 5/5] END learning_rate=0.01, max_depth=4, n_estimators=200, subsample=0.8;, score=0.821 total time=   1.2s
[

[CV 2/5] END learning_rate=0.01, max_depth=6, n_estimators=50, subsample=1.0;, score=0.843 total time=   0.5s
[CV 3/5] END learning_rate=0.01, max_depth=6, n_estimators=50, subsample=1.0;, score=0.769 total time=   0.5s
[CV 4/5] END learning_rate=0.01, max_depth=6, n_estimators=50, subsample=1.0;, score=0.791 total time=   0.4s
[CV 5/5] END learning_rate=0.01, max_depth=6, n_estimators=50, subsample=1.0;, score=0.806 total time=   0.5s
[CV 1/5] END learning_rate=0.01, max_depth=6, n_estimators=100, subsample=0.8;, score=0.896 total time=   0.9s
[CV 2/5] END learning_rate=0.01, max_depth=6, n_estimators=100, subsample=0.8;, score=0.888 total time=   0.9s
[CV 3/5] END learning_rate=0.01, max_depth=6, n_estimators=100, subsample=0.8;, score=0.843 total time=   1.0s
[CV 4/5] END learning_rate=0.01, max_depth=6, n_estimators=100, subsample=0.8;, score=0.851 total time=   0.9s
[CV 5/5] END learning_rate=0.01, max_depth=6, n_estimators=100, subsample=0.8;, score=0.813 total time=   0.9s
[CV 1

[CV 2/5] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.873 total time=   1.2s
[CV 3/5] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.836 total time=   1.2s
[CV 4/5] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.828 total time=   1.3s
[CV 5/5] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0;, score=0.821 total time=   1.4s
[CV 1/5] END learning_rate=0.1, max_depth=4, n_estimators=50, subsample=0.8;, score=0.873 total time=   0.3s
[CV 2/5] END learning_rate=0.1, max_depth=4, n_estimators=50, subsample=0.8;, score=0.851 total time=   0.3s
[CV 3/5] END learning_rate=0.1, max_depth=4, n_estimators=50, subsample=0.8;, score=0.843 total time=   0.3s
[CV 4/5] END learning_rate=0.1, max_depth=4, n_estimators=50, subsample=0.8;, score=0.843 total time=   0.3s
[CV 5/5] END learning_rate=0.1, max_depth=4, n_estimators=50, subsample=0.8;, score=0.813 total time=   0.3s
[CV 1/5] END le

[CV 2/5] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.873 total time=   0.9s
[CV 3/5] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.821 total time=   0.9s
[CV 4/5] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.843 total time=   1.0s
[CV 5/5] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0;, score=0.828 total time=   1.1s
[CV 1/5] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.881 total time=   1.8s
[CV 2/5] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.873 total time=   1.8s
[CV 3/5] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.813 total time=   1.7s
[CV 4/5] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.851 total time=   1.7s
[CV 5/5] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8;, score=0.828 total time=   1.6s
[CV 1/5] E

[CV 2/5] END learning_rate=0.2, max_depth=3, n_estimators=50, subsample=1.0;, score=0.881 total time=   0.2s
[CV 3/5] END learning_rate=0.2, max_depth=3, n_estimators=50, subsample=1.0;, score=0.821 total time=   0.3s
[CV 4/5] END learning_rate=0.2, max_depth=3, n_estimators=50, subsample=1.0;, score=0.821 total time=   0.3s
[CV 5/5] END learning_rate=0.2, max_depth=3, n_estimators=50, subsample=1.0;, score=0.813 total time=   0.2s
[CV 1/5] END learning_rate=0.2, max_depth=3, n_estimators=100, subsample=0.8;, score=0.858 total time=   0.5s
[CV 2/5] END learning_rate=0.2, max_depth=3, n_estimators=100, subsample=0.8;, score=0.866 total time=   0.5s
[CV 3/5] END learning_rate=0.2, max_depth=3, n_estimators=100, subsample=0.8;, score=0.813 total time=   0.5s
[CV 4/5] END learning_rate=0.2, max_depth=3, n_estimators=100, subsample=0.8;, score=0.813 total time=   0.4s
[CV 5/5] END learning_rate=0.2, max_depth=3, n_estimators=100, subsample=0.8;, score=0.836 total time=   0.4s
[CV 1/5] END l

[CV 2/5] END learning_rate=0.2, max_depth=4, n_estimators=200, subsample=1.0;, score=0.873 total time=   1.7s
[CV 3/5] END learning_rate=0.2, max_depth=4, n_estimators=200, subsample=1.0;, score=0.821 total time=   1.6s
[CV 4/5] END learning_rate=0.2, max_depth=4, n_estimators=200, subsample=1.0;, score=0.836 total time=   1.6s
[CV 5/5] END learning_rate=0.2, max_depth=4, n_estimators=200, subsample=1.0;, score=0.828 total time=   1.6s
[CV 1/5] END learning_rate=0.2, max_depth=5, n_estimators=50, subsample=0.8;, score=0.881 total time=   0.4s
[CV 2/5] END learning_rate=0.2, max_depth=5, n_estimators=50, subsample=0.8;, score=0.873 total time=   0.3s
[CV 3/5] END learning_rate=0.2, max_depth=5, n_estimators=50, subsample=0.8;, score=0.836 total time=   0.3s
[CV 4/5] END learning_rate=0.2, max_depth=5, n_estimators=50, subsample=0.8;, score=0.791 total time=   0.3s
[CV 5/5] END learning_rate=0.2, max_depth=5, n_estimators=50, subsample=0.8;, score=0.843 total time=   0.4s
[CV 1/5] END le

[CV 2/5] END learning_rate=0.2, max_depth=6, n_estimators=100, subsample=1.0;, score=0.873 total time=   1.3s
[CV 3/5] END learning_rate=0.2, max_depth=6, n_estimators=100, subsample=1.0;, score=0.851 total time=   1.2s
[CV 4/5] END learning_rate=0.2, max_depth=6, n_estimators=100, subsample=1.0;, score=0.851 total time=   1.7s
[CV 5/5] END learning_rate=0.2, max_depth=6, n_estimators=100, subsample=1.0;, score=0.836 total time=   1.5s
[CV 1/5] END learning_rate=0.2, max_depth=6, n_estimators=200, subsample=0.8;, score=0.866 total time=   2.4s
[CV 2/5] END learning_rate=0.2, max_depth=6, n_estimators=200, subsample=0.8;, score=0.896 total time=   2.4s
[CV 3/5] END learning_rate=0.2, max_depth=6, n_estimators=200, subsample=0.8;, score=0.821 total time=   2.2s
[CV 4/5] END learning_rate=0.2, max_depth=6, n_estimators=200, subsample=0.8;, score=0.836 total time=   2.6s
[CV 5/5] END learning_rate=0.2, max_depth=6, n_estimators=200, subsample=0.8;, score=0.821 total time=   2.0s
[CV 1/5] E

[CV 2/5] END learning_rate=0.5, max_depth=4, n_estimators=50, subsample=1.0;, score=0.843 total time=   0.3s
[CV 3/5] END learning_rate=0.5, max_depth=4, n_estimators=50, subsample=1.0;, score=0.821 total time=   0.4s
[CV 4/5] END learning_rate=0.5, max_depth=4, n_estimators=50, subsample=1.0;, score=0.851 total time=   0.4s
[CV 5/5] END learning_rate=0.5, max_depth=4, n_estimators=50, subsample=1.0;, score=0.813 total time=   0.4s
[CV 1/5] END learning_rate=0.5, max_depth=4, n_estimators=100, subsample=0.8;, score=0.851 total time=   0.7s
[CV 2/5] END learning_rate=0.5, max_depth=4, n_estimators=100, subsample=0.8;, score=0.873 total time=   0.8s
[CV 3/5] END learning_rate=0.5, max_depth=4, n_estimators=100, subsample=0.8;, score=0.791 total time=   0.6s
[CV 4/5] END learning_rate=0.5, max_depth=4, n_estimators=100, subsample=0.8;, score=0.828 total time=   0.6s
[CV 5/5] END learning_rate=0.5, max_depth=4, n_estimators=100, subsample=0.8;, score=0.821 total time=   0.6s
[CV 1/5] END l

[CV 2/5] END learning_rate=0.5, max_depth=5, n_estimators=200, subsample=1.0;, score=0.881 total time=   1.7s
[CV 3/5] END learning_rate=0.5, max_depth=5, n_estimators=200, subsample=1.0;, score=0.813 total time=   1.7s
[CV 4/5] END learning_rate=0.5, max_depth=5, n_estimators=200, subsample=1.0;, score=0.858 total time=   1.8s
[CV 5/5] END learning_rate=0.5, max_depth=5, n_estimators=200, subsample=1.0;, score=0.828 total time=   1.8s
[CV 1/5] END learning_rate=0.5, max_depth=6, n_estimators=50, subsample=0.8;, score=0.851 total time=   0.4s
[CV 2/5] END learning_rate=0.5, max_depth=6, n_estimators=50, subsample=0.8;, score=0.851 total time=   0.4s
[CV 3/5] END learning_rate=0.5, max_depth=6, n_estimators=50, subsample=0.8;, score=0.828 total time=   0.4s
[CV 4/5] END learning_rate=0.5, max_depth=6, n_estimators=50, subsample=0.8;, score=0.813 total time=   0.4s
[CV 5/5] END learning_rate=0.5, max_depth=6, n_estimators=50, subsample=0.8;, score=0.836 total time=   0.3s
[CV 1/5] END le

[CV 2/5] END learning_rate=1.0, max_depth=3, n_estimators=100, subsample=1.0;, score=0.836 total time=   0.5s
[CV 3/5] END learning_rate=1.0, max_depth=3, n_estimators=100, subsample=1.0;, score=0.806 total time=   0.4s
[CV 4/5] END learning_rate=1.0, max_depth=3, n_estimators=100, subsample=1.0;, score=0.813 total time=   0.4s
[CV 5/5] END learning_rate=1.0, max_depth=3, n_estimators=100, subsample=1.0;, score=0.799 total time=   0.4s
[CV 1/5] END learning_rate=1.0, max_depth=3, n_estimators=200, subsample=0.8;, score=0.851 total time=   0.7s
[CV 2/5] END learning_rate=1.0, max_depth=3, n_estimators=200, subsample=0.8;, score=0.866 total time=   0.9s
[CV 3/5] END learning_rate=1.0, max_depth=3, n_estimators=200, subsample=0.8;, score=0.761 total time=   0.9s
[CV 4/5] END learning_rate=1.0, max_depth=3, n_estimators=200, subsample=0.8;, score=0.813 total time=   0.9s
[CV 5/5] END learning_rate=1.0, max_depth=3, n_estimators=200, subsample=0.8;, score=0.784 total time=   1.1s
[CV 1/5] E

[CV 2/5] END learning_rate=1.0, max_depth=5, n_estimators=50, subsample=1.0;, score=0.866 total time=   0.3s
[CV 3/5] END learning_rate=1.0, max_depth=5, n_estimators=50, subsample=1.0;, score=0.784 total time=   0.4s
[CV 4/5] END learning_rate=1.0, max_depth=5, n_estimators=50, subsample=1.0;, score=0.813 total time=   0.3s
[CV 5/5] END learning_rate=1.0, max_depth=5, n_estimators=50, subsample=1.0;, score=0.806 total time=   0.4s
[CV 1/5] END learning_rate=1.0, max_depth=5, n_estimators=100, subsample=0.8;, score=0.851 total time=   0.6s
[CV 2/5] END learning_rate=1.0, max_depth=5, n_estimators=100, subsample=0.8;, score=0.843 total time=   0.7s
[CV 3/5] END learning_rate=1.0, max_depth=5, n_estimators=100, subsample=0.8;, score=0.776 total time=   0.6s
[CV 4/5] END learning_rate=1.0, max_depth=5, n_estimators=100, subsample=0.8;, score=0.806 total time=   0.6s
[CV 5/5] END learning_rate=1.0, max_depth=5, n_estimators=100, subsample=0.8;, score=0.806 total time=   0.7s
[CV 1/5] END l

[CV 2/5] END learning_rate=1.0, max_depth=6, n_estimators=200, subsample=1.0;, score=0.858 total time=   1.1s
[CV 3/5] END learning_rate=1.0, max_depth=6, n_estimators=200, subsample=1.0;, score=0.843 total time=   1.4s
[CV 4/5] END learning_rate=1.0, max_depth=6, n_estimators=200, subsample=1.0;, score=0.806 total time=   1.7s
[CV 5/5] END learning_rate=1.0, max_depth=6, n_estimators=200, subsample=1.0;, score=0.821 total time=   1.3s
Best parameters: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Best score: 0.8626865671641791


# Gradient Boosting Regression 

In [8]:
from sklearn.datasets import  make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#   Genarate Dataset

In [9]:
X, y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)


NameError: name 'make_regression' is not defined

In [None]:
regressor = GradientBoostingRegressor()

# Fit the regressor to the training data
regressor.fit(X_train, y_train)
