#**AdaBoost Decision Tree - Classification (Sklearn)**

**Import Libraries**

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from sklearn.datasets import load_diabetes, load_iris, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.preprocessing import MaxAbsScaler, PowerTransformer
from sklearn.linear_model import LinearRegression, LogisticRegression, RidgeClassifier, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.ensemble import BaggingClassifier, BaggingRegressor
from sklearn.ensemble import VotingClassifier, VotingRegressor
from sklearn.ensemble import StackingClassifier, StackingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor
from sklearn.ensemble import IsolationForest
from sklearn.svm import SVC, SVR
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import precision_score, recall_score, f1_score, explained_variance_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc

**Code**

In [2]:
class AdaBoost:
    def __init__(self, model, n_estimators=50):
        self.model = model  # The weak learner passed as a parameter
        self.n_estimators = n_estimators
        self.models = []  # List to hold weak learners
        self.alphas = []  # List to hold the weights of the weak learners

    def fit(self, X, y):
        n_samples = X.shape[0]
        w = np.ones(n_samples) / n_samples  # Initialize weights

        for _ in range(self.n_estimators):
            # Clone the model to ensure a new instance is created for each fit
            model = self.model
            model.fit(X, y, sample_weight=w)  # Fit model with sample weights
            y_pred = model.predict(X)

            # Calculate error
            error = np.sum(w * (y_pred != y)) / np.sum(w)
            alpha = 0.5 * np.log((1 - error) / (error + 1e-10))

            # Update weights
            w *= np.exp(-alpha * (y_pred == y) * 2 - 1)  # Adjusted for binary labels
            w /= np.sum(w)  # Normalize weights

            self.models.append(model)
            self.alphas.append(alpha)

    def predict(self, X):
        y_pred = np.zeros(X.shape[0])
        for alpha, model in zip(self.alphas, self.models):
            y_pred += alpha * model.predict(X)
        return (y_pred > 0).astype(int)

    def get_n_estimators(self):
        return len(self.models)


**Load Dataset**

In [3]:
# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

In [4]:
# Convert to binary classification for AdaBoost
y = np.where(y == 2, 1, 0)  # Convert to 0 and 1 for AdaBoost

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

In [6]:
# Create a DecisionTreeClassifier instance
decision_tree_model = DecisionTreeClassifier(max_depth=1)

In [7]:
# Train AdaBoost with the decision tree model
adaboost = AdaBoost(model=decision_tree_model, n_estimators=50)
adaboost.fit(X_train, y_train)

In [8]:
# Get number of decision trees used
num_trees = adaboost.get_n_estimators()
print(f"Number of Decision Trees Used: {num_trees}")

Number of Decision Trees Used: 50


In [9]:
# Predictions
predictions = adaboost.predict(X_test)
print(predictions[:5])

[0 0 0 0 0]


In [10]:
# Accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"AdaBoost Accuracy: {accuracy:.2f}")

AdaBoost Accuracy: 0.60


In [11]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, predictions)
print("Confusion Matrix:\n", conf_matrix)

Confusion Matrix:
 [[18  1]
 [11  0]]
