#**AdaBoost Random Forest - Regression (Sklearn)**

**Import Libraries**

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from sklearn.datasets import load_diabetes, load_iris, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.preprocessing import MaxAbsScaler, PowerTransformer
from sklearn.linear_model import LinearRegression, LogisticRegression, RidgeClassifier, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.ensemble import BaggingClassifier, BaggingRegressor
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.ensemble import StackingClassifier, StackingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor
from sklearn.ensemble import IsolationForest
from sklearn.svm import SVC, SVR
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import precision_score, recall_score, f1_score, explained_variance_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc

##**Uisng Sklearn**

In [2]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.metrics import mean_squared_error

# Load the diabetes dataset for regression
data = load_diabetes()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Regressor
random_forest = RandomForestRegressor(n_estimators=5, max_depth=10, random_state=42)

# Create an AdaBoost Regressor with Random Forest as the base estimator
adaboost = AdaBoostRegressor(estimator=random_forest, n_estimators=3, learning_rate=1.0, random_state=42)

# Fit the AdaBoost model
adaboost.fit(X_train, y_train)

# Make predictions on the test set
y_pred = adaboost.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

# Summary of the number of trees and forests
print(f"Total number of Random Forests used: {3}")
print(f"Each Random Forest contains {5} trees.")

Mean Squared Error: 3199.36
Total number of Random Forests used: 3
Each Random Forest contains 5 trees.


##**Using AdaBoost Class**

In [3]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

class AdaBoost:
    def __init__(self, n_estimators=5, learning_rate=1, n_trees_per_forest=5):
        self.n_estimators = n_estimators      # Number of forests (weak learners)
        self.learning_rate = learning_rate
        self.n_trees_per_forest = n_trees_per_forest  # Trees per forest
        self.models = []
        self.alphas = []

    def fit(self, X, y):
        n_samples = len(y)
        # Initialize weights
        weights = np.ones(n_samples) / n_samples

        for _ in range(self.n_estimators):
            # Create a random forest regressor as the weak learner
            model = RandomForestRegressor(n_estimators=self.n_trees_per_forest, max_depth=3, random_state=42)
            model.fit(X, y, sample_weight=weights)
            y_pred = model.predict(X)

            # Compute the error
            error = np.mean((y_pred - y) ** 2)
            if error >= 1:  # If the error is too high, skip this estimator
                continue

            alpha = self.learning_rate * 0.5 * np.log((1 - error) / (error + 1e-10))  # Avoid division by zero

            # Update weights
            weights *= np.exp(-alpha * (y_pred - y) ** 2)  # Adjust this for regression
            weights /= np.sum(weights)  # Normalize weights

            self.models.append(model)
            self.alphas.append(alpha)

    def predict(self, X):
        # Compute the weighted sum of the predictions from all models
        final_prediction = np.zeros(X.shape[0])  # Initialize with zeros
        for alpha, model in zip(self.alphas, self.models):
            final_prediction += alpha * model.predict(X)
        return final_prediction

    def evaluate(self, X, y):
        predictions = self.predict(X)
        mse = np.mean((y - predictions) ** 2)
        return mse

    def report_forest_info(self):
        # Reporting the number of forests (weak learners) and trees per forest
        print(f"Number of forests (weak learners): {len(self.models)}")
        if len(self.models) > 0:
            print(f"Number of trees per forest: {self.n_trees_per_forest}")

# Load the diabetes dataset for regression
data = load_diabetes()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the AdaBoost regressor with Random Forest as the base learner
adaboost_regressor = AdaBoost(n_estimators=3, learning_rate=1, n_trees_per_forest=5)  # 3 forests with 5 trees each
adaboost_regressor.fit(X_train, y_train)

# Predictions
y_pred = adaboost_regressor.predict(X_test)
print("First 5 Predictions:", y_pred[:5])

# Evaluate
mse = adaboost_regressor.evaluate(X_test, y_test)
print(f"MSE: {mse:.2f}")

# Report the number of forests and trees per forest
adaboost_regressor.report_forest_info()

First 5 Predictions: [0. 0. 0. 0. 0.]
MSE: 26548.58
Number of forests (weak learners): 0
