In [None]:
# Q1. What is Gradient Boosting Regression?
# Gradient Boosting Regression is a machine learning technique used for regression tasks.
# It builds an ensemble of weak learners (e.g., decision trees) sequentially, where each tree corrects
# the residual errors of the previous trees. The algorithm optimizes a loss function (e.g., mean squared error)
# using gradient descent.

# Q2. Implement a simple gradient boosting algorithm from scratch using Python and NumPy.
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Generate a simple dataset
np.random.seed(42)
X = np.random.rand(100, 1) * 10  # Features
y = 3 * X.squeeze() + np.random.randn(100) * 2  # Target with noise

# Split dataset into training and testing
split_index = int(0.8 * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Gradient Boosting Regressor from scratch
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        """
        Initialize the Gradient Boosting Regressor.

        Parameters:
        n_estimators: Number of weak learners (decision trees).
        learning_rate: Step size for updating predictions.
        max_depth: Maximum depth of each weak learner.
        """
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []

    def fit(self, X, y):
        """
        Train the Gradient Boosting Regressor.
        """
        residual = y
        for _ in range(self.n_estimators):
            model = DecisionStump(max_depth=self.max_depth)
            model.fit(X, residual)
            predictions = model.predict(X)

            # Update residuals
            residual -= self.learning_rate * predictions

            # Store the trained model
            self.models.append(model)

    def predict(self, X):
        """
        Predict target values for input data X.
        """
        predictions = np.zeros(X.shape[0])
        for model in self.models:
            predictions += self.learning_rate * model.predict(X)
        return predictions

class DecisionStump:
    def __init__(self, max_depth=1):
        """
        Initialize a simple decision stump.
        """
        self.threshold = None
        self.feature_index = None
        self.left_value = None
        self.right_value = None

    def fit(self, X, y):
        """
        Fit the decision stump to the data.
        """
        best_loss = float('inf')
        for feature_index in range(X.shape[1]):
            for threshold in np.unique(X[:, feature_index]):
                left_mask = X[:, feature_index] <= threshold
                right_mask = ~left_mask

                left_value = y[left_mask].mean() if left_mask.any() else 0
                right_value = y[right_mask].mean() if right_mask.any() else 0

                predictions = np.where(left_mask, left_value, right_value)
                loss = mean_squared_error(y, predictions)

                if loss < best_loss:
                    best_loss = loss
                    self.threshold = threshold
                    self.feature_index = feature_index
                    self.left_value = left_value
                    self.right_value = right_value

    def predict(self, X):
        """
        Predict using the fitted decision stump.
        """
        left_mask = X[:, self.feature_index] <= self.threshold
        return np.where(left_mask, self.left_value, self.right_value)

# Train the model
gbr = GradientBoostingRegressor(n_estimators=50, learning_rate=0.1)
gbr.fit(X_train, y_train)
y_pred = gbr.predict(X_test)

# Evaluate the model
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-Squared:", r2_score(y_test, y_pred))

# Q3. Experiment with different hyperparameters to optimize performance
# Use Grid Search or Random Search to test combinations of hyperparameters:
# - Learning rate
# - Number of estimators
# - Maximum depth
# Evaluate each combination using cross-validation and metrics such as MSE or R-squared.

# Q4. What is a weak learner in Gradient Boosting?
# A weak learner is a simple model (e.g., a shallow decision tree) that performs slightly better than random guessing.
# Gradient Boosting combines these weak learners to form a strong predictive model.

# Q5. What is the intuition behind the Gradient Boosting algorithm?
# Gradient Boosting sequentially trains models to correct the errors made by previous models.
# Each new learner minimizes the residual errors, optimizing the overall model's performance.

# Q6. How does Gradient Boosting build an ensemble of weak learners?
# 1. Initialize the model with a constant prediction (e.g., the mean of the target variable).
# 2. Add weak learners iteratively:
#    - Compute residuals (gradient of the loss function).
#    - Train a weak learner on the residuals.
#    - Update the ensemble with the new learner.
# 3. Repeat until the desired number of iterations is reached.

# Q7. What are the steps involved in constructing the mathematical intuition of Gradient Boosting?
# 1. Define a loss function L(y, f(x)) to measure model performance.
# 2. Initialize the model with a constant prediction f_0(x).
# 3. For each iteration t:
#    - Compute the gradient of the loss function with respect to predictions g_t.
#    - Train a weak learner h_t(x) to approximate the negative gradient -g_t.
#    - Update the model: f_t(x) = f_{t-1}(x) + η * h_t(x), where η is the learning rate.
# 4. Output the final ensemble model after T iterations.
