#**AdaBoost Decision Tree - Regression (Sklearn)**

**Import Libraries**

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from sklearn.datasets import load_diabetes, load_iris, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.preprocessing import MaxAbsScaler, PowerTransformer
from sklearn.linear_model import LinearRegression, LogisticRegression, RidgeClassifier, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.ensemble import BaggingClassifier, BaggingRegressor
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.ensemble import StackingClassifier, StackingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor
from sklearn.ensemble import IsolationForest
from sklearn.svm import SVC, SVR
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import precision_score, recall_score, f1_score, explained_variance_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc

**Code**

##**Using Sklearn**

In [2]:
# Simple dataset
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])  # Features
y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])  # Target values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a DecisionTreeRegressor
base_model = DecisionTreeRegressor(criterion='squared_error', max_depth=5, min_samples_split=3)

# Create an AdaBoost regressor
adaboost_model = AdaBoostRegressor(estimator=base_model, n_estimators=50, random_state=42)

# Train the AdaBoost regressor
adaboost_model.fit(X_train, y_train)

# Make predictions
y_pred = adaboost_model.predict(X_test)

# Evaluate the regression performance
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R^2 Score: {r2}")


Mean Squared Error: 0.5
Mean Absolute Error: 0.5
R^2 Score: 0.9591836734693877


##**Using AdaBoost Class**

In [3]:
class AdaBoost:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators

    def fit(self, X, y, model):
        n_samples = len(y)
        self.models = []
        self.alphas = []
        w = np.ones(n_samples) / n_samples  # Initialize weights

        for _ in range(self.n_estimators):
            model.fit(X, y)  # Fit the model
            y_pred = model.predict(X)

            # Calculate the error
            error = np.abs(y - y_pred)
            error_rate = np.dot(w, error) / np.sum(w)  # Weighted error rate

            # Handle perfect predictions
            if error_rate <= 0:
                print("Perfect prediction encountered. Stopping training.")
                break

            # Calculate alpha
            alpha = 0.5 * np.log((1 - error_rate) / (error_rate + 1e-10))  # Avoid division by zero
            self.models.append(model)
            self.alphas.append(alpha)

            # Update weights
            w *= np.exp(-alpha * (y - y_pred))
            w /= np.sum(w)  # Normalize weights

            # Check if all errors are very small
            if np.all(error < 1e-10):
                print("All errors are very small. Stopping training.")
                break

        print(f"Number of trees used: {len(self.models)}")

    def predict(self, X):
        y_pred = np.zeros(X.shape[0])
        for model, alpha in zip(self.models, self.alphas):
            y_pred += alpha * model.predict(X)
        return y_pred

# Simple dataset
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])  # Features
y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])  # Target values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a DecisionTreeRegressor
base_model = DecisionTreeRegressor(criterion='squared_error', max_depth=5, min_samples_split=3)

# Create and train the AdaBoost model
adaboost_model = AdaBoost(n_estimators=50)
adaboost_model.fit(X_train, y_train, base_model)

# Make predictions
y_pred = adaboost_model.predict(X_test)

# Evaluate the regression performance
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R^2 Score: {r2}")


Number of trees used: 50
Mean Squared Error: 1162.5003896661963
Mean Absolute Error: 26.739721508241985
R^2 Score: -93.89799099315888
