Adaboost.M2 for neural networks as base learners
This code implements a neural network-based AdaBoost.M2 algorithm for binary classification. AdaBoost.M2 is an ensemble learning method that combines multiple weak learners to create a strong classifier. The weak learners in this implementation are neural networks trained on the Breast Cancer Wisconsin dataset.

In [1]:
import numpy as np
import pandas as pd

ANN

In [2]:
# Activation function (sigmoid) and its derivative
def logistic(x):
    return 1 / (1 + np.exp(-x))

def logistic_derivative(x):
    return np.multiply(x, (1 - x))

# Single layer forward propagation
def single_layer_forward_propagation(W, B, A_prev):
    Z = np.dot(W, A_prev) + B
    return logistic(Z)

# Set parameters in the parameter dictionary
def set_parameters(parameters, l, W, B):
    parameters["W" + str(l + 1)] = W
    parameters["b" + str(l + 1)] = B
    return parameters

# Get parameters from the parameter dictionary
def get_parameters(parameters, l):
    return parameters["W" + str(l + 1)], parameters["b" + str(l + 1)]

# Forward propagation through the entire network
def vectorized_forward_propagation(X, parameters):
    cache = {}
    L = len(parameters) // 2
    A = np.transpose(X)
    cache["A0"] = A
    for l in range(L):
        W, B = get_parameters(parameters, l)
        A = single_layer_forward_propagation(W, B, A)
        cache["A" + str(l + 1)] = A
    return cache

# Backward propagation through the entire network
def vectorized_backward_propagation(X, y, cache, parameters, alpha, W):
    m = X.shape[0]
    L = len(parameters) // 2
    A_last = cache["A" + str(L)]
    error = A_last - y

    for l in reversed(range(L)):
        A_prev = cache["A" + str(l)]
        W_curr, B_curr = get_parameters(parameters, l)

        delta = error * logistic_derivative(cache["A" + str(l + 1)])
        dW = np.dot(delta, A_prev.T) / m
        dB = np.sum(delta, axis=1, keepdims=True) / m

        W_curr -= alpha * dW
        B_curr -= alpha * dB

        parameters = set_parameters(parameters, l, W_curr, B_curr)

        if l > 0:
            error = np.dot(W_curr.T, delta)

    return parameters

# Xavier/He initialization for weights
def get_a(n_prev, n_next):
    return np.sqrt(2 / (n_prev + n_next))

# Initialize weights for all layers
def initialize_weights(layer_dims):
    parameters = {}
    for l in range(1, len(layer_dims)):
        a = get_a(layer_dims[l-1], layer_dims[l])
        parameters["W" + str(l)] = np.random.uniform(size=(layer_dims[l], layer_dims[l-1]), low=-a, high=a)
        parameters["b" + str(l)] = np.zeros((layer_dims[l], 1))
    return parameters

Adaboost.M2 Implementation

In [4]:
def train_weak_learner(X, y, W, alpha, epochs, layer_dims):
     """
    Train a weak learner (neural network) using the given weights.
    Args:
        X: Training data features.
        y: Training data labels (binary).
        W: Sample weights.
    """
    parameters = initialize_weights(layer_dims)

    for epoch in range(epochs):
        # intermediate values storage
        cache = vectorized_forward_propagation(X, parameters)
        parameters = vectorized_backward_propagation(X, y.T, cache, parameters, alpha, W)

    return parameters

# Forward pass through the trained network
def forward(X, parameters):
    cache = vectorized_forward_propagation(X, parameters)
    A_last = cache["A" + str(len(parameters) // 2)]
    return A_last


def AdaBoostM2(X, y, T, alpha, epochs, layer_dims):
     """
    AdaBoost.M2 algorithm for binary classification.

    Args:
        X: Training data features.
        y: Training data labels (binary).
        T: Number of boosting iterations.

    Returns:
        List of weak learners and their weights.
    """
    N = len(X)
    W = np.ones(N) / N
    weak_learners = []
    learner_weights = []

    for t in range(T):
        # Get trained parameters
        parameters = train_weak_learner(X, y, W, alpha, epochs, layer_dims)
        predictions = forward(X, parameters).flatten()
        # Calculate Pseudo-loss
        epsilon_t = np.sum(W * (y != (predictions > 0.5).astype(int))) / 2
        print(f"Iteration {t}, Pseudo-loss: {epsilon_t:.4f}")

        # If the error is too high, stop early
        if epsilon_t >= 0.5:
            break

        # Calculate beta
        beta_t = epsilon_t / (1 - epsilon_t)
        print(f"Iteration {t}, Beta: {beta_t:.4f}")

        # Update sample weights
        W *= np.exp(0.5 * (1 - (y == (predictions > 0.5).astype(int))))
        W /= np.sum(W)
        print(f"Iteration {t}, Weights: {W}")

        # Store the weak learner and its weight
        weak_learners.append(parameters)
        learner_weights.append(np.log(1 / beta_t))

    return weak_learners, learner_weights

def predict(X, weak_learners, weights):
     """
    Predict the class labels for a given set of data points.

    Args:
        X: Data points to predict.
        weak_learners: List of weak learners.
        weights: List of weights for the weak learners.

    Returns:
        Predicted class labels.
    """
    M = len(X)
    y_pred = np.zeros(M)

    for i in range(M):
        weighted_sum = np.sum([forward(X[i].reshape(1, -1), wl).flatten() * w for wl, w in zip(weak_learners, weights)], axis=0)
        y_pred[i] = (weighted_sum > 0.5).astype(int)

    return y_pred

# Hyperparameter tuning using grid search
def hyperparameter_tuning(X, y, T, alpha_values, epoch_values, layer_configs):
    best_alpha = None
    best_epochs = None
    best_layer_dims = None
    best_accuracy = 0

    # Split data into training and validation sets
    split_index = int(0.7 * len(X))
    X_train, X_val = X[:split_index], X[split_index:]
    y_train, y_val = y[:split_index], y[split_index:]

    for alpha in alpha_values:
        for epochs in epoch_values:
            for layer_dims in layer_configs:
                weak_learners, weights = AdaBoostM2(X_train, y_train, T, alpha, epochs, layer_dims)
                y_pred_val = predict(X_val, weak_learners, weights)
                accuracy_val = np.mean(y_pred_val == y_val)
                print(f"Alpha: {alpha}, Epochs: {epochs}, Layers: {layer_dims}, Validation Accuracy: {accuracy_val * 100:.2f}%")

                if accuracy_val > best_accuracy:
                    best_alpha = alpha
                    best_epochs = epochs
                    best_layer_dims = layer_dims
                    best_accuracy = accuracy_val

    return best_alpha, best_epochs, best_layer_dims

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 9)

Data upload and preprocessing + Model Training and Evaluation

In [None]:
# Load the Breast Cancer Wisconsin dataset
data = pd.read_csv("breast_cancer_wisconsin.csv")
X = data.iloc[:, 2:].values
y = data.iloc[:, 1].values

# Map string labels to integers
label_mapping = {label: idx for idx, label in enumerate(np.unique(y))}
y = np.array([label_mapping[label] for label in y])

# Normalize features
X = (X - X.mean(axis=0)) / X.std(axis=0)

# Hyperparameters to tune
alpha_values = [0.01, 0.05, 0.1]
epoch_values = [20, 50, 100]
layer_configs = [
    [X.shape[1], 4, 1],
    [X.shape[1], 8, 4, 1],
    [X.shape[1], 16, 8, 4, 1]
]

# Perform hyperparameter tuning
T = 20  # Number of boosting iterations
best_alpha, best_epochs, best_layer_dims = hyperparameter_tuning(X, y, T, alpha_values, epoch_values, layer_configs)
print(f"Best Alpha: {best_alpha}, Best Epochs: {best_epochs}, Best Layer Configuration: {best_layer_dims}")

# Train AdaBoost.M2 with the best hyperparameters
split_index = int(0.7 * len(X))
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

weak_learners, weights = AdaBoostM2(X_train, y_train, T, best_alpha, best_epochs, best_layer_dims)

# Evaluate on the test set
y_pred_train = predict(X_train, weak_learners, weights)
accuracy_train = np.mean(y_pred_train == y_train)
print(f"Training Accuracy: {accuracy_train * 100:.2f}%")

y_pred_test = predict(X_test, weak_learners, weights)
accuracy_test = np.mean(y_pred_test == y_test)
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

Iteration 0, Pseudo-loss: 0.2173
Iteration 0, Beta: 0.2777
Iteration 0, Weights: [0.00323134 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134
 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134
 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134
 0.00323134 0.00195991 0.00195991 0.00195991 0.00323134 0.00323134
 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134
 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134 0.00323134
 0.00323134 0.00195991 0.00323134 0.00323134 0.00323134 0.00323134
 0.00323134 0.00323134 0.00323134 0.00323134 0.00195991 0.00323134
 0.00195991 0.00195991 0.00195991 0.00195991 0.00195991 0.00323134
 0.00323134 0.00195991 0.00323134 0.00323134 0.00195991 0.00195991
 0.00195991 0.00195991 0.00323134 0.00195991 0.00323134 0.00323134
 0.00195991 0.00195991 0.00195991 0.00195991 0.00323134 0.00195991
 0.00323134 0.00323134 0.00195991 0.00323134 0.00195991 0.00323134
 0.00323134 0.00195991 0.00195991 0.00195991 0.0

  y_pred[i] = (weighted_sum > 0.5).astype(int)


Iteration 5, Pseudo-loss: 0.4518
Iteration 5, Beta: 0.8241
Iteration 5, Weights: [0.00542882 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882
 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882
 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882
 0.00542882 0.00027029 0.00027029 0.00027029 0.00542882 0.00542882
 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882
 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882 0.00542882
 0.00542882 0.00027029 0.00542882 0.00542882 0.00542882 0.00542882
 0.00542882 0.00542882 0.00542882 0.00542882 0.00027029 0.00542882
 0.00027029 0.00027029 0.00027029 0.00027029 0.00027029 0.00542882
 0.00542882 0.00027029 0.00542882 0.00542882 0.00027029 0.00027029
 0.00027029 0.00027029 0.00542882 0.00027029 0.00542882 0.00542882
 0.00027029 0.00027029 0.00027029 0.00027029 0.00542882 0.00027029
 0.00542882 0.00542882 0.00027029 0.00542882 0.00027029 0.00542882
 0.00542882 0.00027029 0.00027029 0.00027029 0.0