In [16]:
# Bagging (Bootstrapped Aggregation) core concept implementation
# Here random data is generated and random classifier is used instead of DT, SVM etc.
# The accuracy will be low due to nature of the classifier in this code and can be improved by using the
# DT, SVM or more powerful algorithm/classifier
# Author: Muhammad Humayun Khan

import numpy as np    # linear algebra
from sklearn.datasets import make_classification    # using sklearn dataset for data generation
from sklearn.model_selection import train_test_split    # model training and testing of data
from collections import Counter

# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=42)

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [17]:
# Define a simple random guessing classifier
class RandomGuessClassifier:
    def fit(self, X, y):
      # np.unique returns two arrays, array of unique class labels and its count in the target (y)
      # e.g. y = [0,1,0,0,1,1,2] then classes = [0,1,2], counts = [3,3,1]
      self.classes_,self.class_counts_ = np.unique(y,return_counts=True)
      self.class_probabilities_ = self.class_counts_ / y.shape[0]

    # predicts new element
    def predict(self, X):
      random_guesses = np.random.choice(self.classes_, size=X.shape[0], p=self.class_probabilities_)
      return random_guesses

In [18]:
# Define the Bagging Classifier
class BaggingClassifier:
    def __init__(self, base_estimator, n_estimators=10):
        self.base_estimator = base_estimator    # base_estimator here is randomGuessClassifier
        self.n_estimators = n_estimators        # no of classifier
        self.models = []                        # store all the classifiers

    def fit(self, X, y):
        n_samples = X.shape[0]
        for _ in range(self.n_estimators):
            indices = np.random.choice(n_samples, n_samples, replace=True)    # bootstrap sample
            X_sample, y_sample = X[indices], y[indices]
            model = self.base_estimator()
            model.fit(X_sample, y_sample)
            self.models.append(model)

    def predict(self, X):
        predictions = np.zeros((self.n_estimators, X.shape[0]), dtype=int)
        for i, model in enumerate(self.models):
            predictions[i] = model.predict(X)
        # Majority vote
        final_predictions = [Counter(predictions[:, i]).most_common(1)[0][0] for i in range(X.shape[0])]
        return np.array(final_predictions)


In [19]:
# Train the Bagging Classifier on the training data
bagging_model = BaggingClassifier(base_estimator=RandomGuessClassifier, n_estimators=100)
bagging_model.fit(X_train, y_train)

In [20]:
# Make predictions on the test data
y_pred = bagging_model.predict(X_test)

In [21]:
# Evaluate the accuracy of the model
accuracy = np.mean(y_pred == y_test)
print(f'Accuracy of Bagging Classifier: {accuracy:.2f}')

Accuracy of Bagging Classifier: 0.52
