Q1. What is Gradient Boosting Regression?
Gradient Boosting Regression is a machine learning technique used for regression tasks that builds an ensemble of weak learners, typically decision trees, in a stage-wise manner. Each subsequent model is trained to correct the errors made by the previous models by optimizing a loss function using gradient descent. The final model is a weighted sum of all the weak learners, aiming to minimize the overall prediction error.

In [1]:
#Q2

import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Sample dataset
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])
y = np.array([3, 6, 2, 8, 7, 3, 5, 8, 7, 10])

# Gradient Boosting implementation
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []

    def fit(self, X, y):
        # Initialize model predictions with the mean of the target values
        self.initial_prediction = np.mean(y)
        y_pred = np.full(y.shape, self.initial_prediction)
        
        for _ in range(self.n_estimators):
            residual = y - y_pred
            model = DecisionTreeRegressor(max_depth=self.max_depth)
            model.fit(X, residual)
            y_pred += self.learning_rate * model.predict(X)
            self.models.append(model)
    
    def predict(self, X):
        y_pred = np.full((X.shape[0],), self.initial_prediction)
        for model in self.models:
            y_pred += self.learning_rate * model.predict(X)
        return y_pred

# Train the model
gbr = GradientBoostingRegressor(n_estimators=10, learning_rate=0.1, max_depth=2)
gbr.fit(X, y)
y_pred = gbr.predict(X)

# Evaluate the model
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)


Mean Squared Error: 2.4518736631688953
R-squared: 0.5973934871643851


In [3]:
#Q3

import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, RegressorMixin

# Sample dataset
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])
y = np.array([3, 6, 2, 8, 7, 3, 5, 8, 7, 10])

# Gradient Boosting implementation
class GradientBoostingRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []
        self.initial_prediction = None

    def fit(self, X, y):
        self.models = []
        self.initial_prediction = np.mean(y)
        y_pred = np.full(y.shape, self.initial_prediction)
        
        for _ in range(self.n_estimators):
            residual = y - y_pred
            model = DecisionTreeRegressor(max_depth=self.max_depth)
            model.fit(X, residual)
            y_pred += self.learning_rate * model.predict(X)
            self.models.append(model)
    
    def predict(self, X):
        y_pred = np.full((X.shape[0],), self.initial_prediction)
        for model in self.models:
            y_pred += self.learning_rate * model.predict(X)
        return y_pred
    
    def get_params(self, deep=True):
        return {
            'n_estimators': self.n_estimators,
            'learning_rate': self.learning_rate,
            'max_depth': self.max_depth
        }
    
    def set_params(self, **params):
        for key, value in params.items():
            setattr(self, key, value)
        return self

# Define the parameter grid
param_grid = {
    'n_estimators': [10, 50, 100],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [1, 2, 3, 4]
}

# Use GridSearchCV for hyperparameter tuning
gbr = GradientBoostingRegressor()
grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3)
grid_search.fit(X, y)

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = -grid_search.best_score_

print("Best parameters:", best_params)
print("Best score (MSE):", best_score)


Best parameters: {'learning_rate': 0.01, 'max_depth': 1, 'n_estimators': 50}
Best score (MSE): 8.818599999166432
