#**AdaBoost Random Forest - Classification (Sklearn)**

**Import Libraries**

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from sklearn.datasets import load_diabetes, load_iris, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.preprocessing import MaxAbsScaler, PowerTransformer
from sklearn.linear_model import LinearRegression, LogisticRegression, RidgeClassifier, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.ensemble import BaggingClassifier, BaggingRegressor
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.ensemble import StackingClassifier, StackingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor
from sklearn.ensemble import IsolationForest
from sklearn.svm import SVC, SVR
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import precision_score, recall_score, f1_score, explained_variance_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc

##**Using Sklearn**

In [2]:
# Generate a synthetic dataset for demonstration
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_classes=3, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Number of Random Forest classifiers to create
num_forests = 3
# Create multiple Random Forest classifiers
random_forests = [(f'rf_{i}', RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42 + i)) for i in range(num_forests)]

# Initialize AdaBoost
ada_boost = AdaBoostClassifier(n_estimators=50, random_state=42)

# Create a VotingClassifier to combine them
ensemble_model = VotingClassifier(estimators=random_forests + [('ada', ada_boost)], voting='hard')

# Train the ensemble model
ensemble_model.fit(X_train, y_train)

# Make predictions
y_pred_ensemble = ensemble_model.predict(X_test)
print(y_pred_ensemble[:5])

# Evaluate the ensemble model
accuracy_ensemble = accuracy_score(y_test, y_pred_ensemble)
print(f"Ensemble Model Accuracy: {accuracy_ensemble * 100:.2f}%")

# Retrieve information about the Random Forest models
total_forests = len(random_forests)  # Total number of Random Forests used
trees_per_forest = random_forests[0][1].n_estimators  # Number of trees in each Random Forest

print(f"Total Random Forests used: {total_forests}")
print(f"Each Random Forest consists of {trees_per_forest} trees.")



[1 2 0 0 1]
Ensemble Model Accuracy: 74.00%
Total Random Forests used: 3
Each Random Forest consists of 100 trees.


##**Using AdaBoost Class**

In [3]:
class AdaBoostForest:
    def __init__(self, n_estimators=5, n_trees=10, max_depth=3, min_samples_split=2):
        self.n_estimators = n_estimators  # Number of boosting iterations
        self.n_trees = n_trees  # Number of trees in each random forest
        self.max_depth = max_depth  # Max depth of each tree
        self.min_samples_split = min_samples_split  # Min samples to split a node
        self.forests = []  # List to store Random Forest models
        self.tree_weights = []  # Weights for each forest

    def fit(self, X, y):
        n_samples = X.shape[0]
        sample_weights = np.full(n_samples, (1 / n_samples))  # Initial sample weights

        for _ in range(self.n_estimators):
            # Create a Random Forest model for this iteration
            forest = RandomForestClassifier(n_estimators=self.n_trees,
                                             max_depth=self.max_depth,
                                             min_samples_split=self.min_samples_split,
                                             random_state=42)
            forest.fit(X, y, sample_weight=sample_weights)  # Fit forest with sample weights

            predictions = forest.predict(X)
            error = np.sum(sample_weights * (predictions != y)) / np.sum(sample_weights)

            # Calculate weight for the forest
            tree_weight = 0.5 * np.log((1 - error) / (error + 1e-10))  # Avoid division by zero
            self.forests.append(forest)
            self.tree_weights.append(tree_weight)

            # Update sample weights
            sample_weights *= np.exp(-tree_weight * y * (2 * (predictions == y) - 1))
            sample_weights /= np.sum(sample_weights)  # Normalize weights

    def predict(self, X):
        weighted_predictions = np.zeros(X.shape[0])
        for forest, tree_weight in zip(self.forests, self.tree_weights):
            predictions = forest.predict(X)
            weighted_predictions += tree_weight * predictions  # Accumulate weighted predictions

        return np.sign(weighted_predictions).astype(int)  # Return class predictions

    def get_info(self):
        return {
            "total_forests": len(self.forests),
            "trees_per_forest": self.n_trees
        }

# Generate a synthetic dataset for demonstration
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert labels from {0, 1} to {-1, 1} for compatibility with the AdaBoostForest implementation
y_train = np.where(y_train == 0, -1, 1)
y_test = np.where(y_test == 0, -1, 1)

# Initialize and fit the AdaBoostForest model
ada_boost_forest = AdaBoostForest(n_estimators=5, n_trees=10, max_depth=3)
ada_boost_forest.fit(X_train, y_train)

# Print information about the model
info = ada_boost_forest.get_info()
print(f"Total Random Forests used: {info['total_forests']}")
print(f"Each Random Forest consists of {info['trees_per_forest']} trees.")

# Make predictions
y_pred = ada_boost_forest.predict(X_test)
print(y_pred[:5])

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"AdaBoostForest Accuracy: {accuracy * 100:.2f}%")

Total Random Forests used: 5
Each Random Forest consists of 10 trees.
[-1 -1 -1 -1 -1]
AdaBoostForest Accuracy: 59.50%
