In [153]:
import numpy as np
from numpy import sqrt, e, log
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split

In [194]:
def point_margin(x, alpha, h):
    """
    Calculate the margin of a point
    Args:
        x:      point
        alpha:  weights
        h:      classifiers
    """
    alpha_l1 = np.linalg.norm(alpha, ord=1)
    T = len(h)
    f = 0
    for i in range(T):
        f = alpha[i] * h[i].predict(x)
    margin = np.abs(f) / alpha_l1
    return margin

def sample_margin(X, alpha, h):
    """
    Calculate the margin of a sample
    Args:
        X:      sample
        alpha:  weights
        h:      classifiers
    """
    m = X.shape[0]
    margin = float('+inf')
    for i in range(m):
        x = X[i].reshape(1, -1)
        margin = min(margin, point_margin(x, alpha, h))
    return margin

def get_error_bound(rho, d, m, delta):
    """
    Calculate the error bound
    Args:
        rho:    margin
        d:      VC dimension
        m:      sample size
        delta:  confidence
    """
    first_term = sqrt((2 * d * log(e * m / d)) / m) * 2 / rho
    second_term = sqrt(log(1 / delta) / (2 * m))
    return first_term + second_term

In [199]:
d = 10
m = 1000000
delta = 0.3

# print(get_error_bound(0.14, d, m, delta))

def create_dataset(m, d):
    """
    Create a dataset for the experiment
    Args:
        m: sample size
        d: VC dimension
    """
    X, y = make_classification(
        n_samples=2*m,
        n_features=d-1,
        n_classes=2,
        random_state=42,
        shuffle=False,
        class_sep=0.2,
    )
    # Changle class labels to +1 and -1
    y = 2*y - 1
    # Split the dataset into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, random_state=42
    )
    return X_train, X_test, y_train, y_test


def evaluate_error(m, d, delta):
    """
    Evaluate the error of the AdaBoost classifier
    Args:
        m:      sample size
        d:      VC dimension
        delta:  confidence
        T:      number of iterations
    """
    # Create the dataset
    X_train, X_test, y_train, y_test = create_dataset(m, d)

    # Train the AdaBoostClassifier with Perceptron base estimator
    clf = AdaBoostClassifier(
        base_estimator=Perceptron(penalty=None),
        # n_estimators=T,
        algorithm='SAMME',
        random_state=20,
    )
    clf.fit(X_train, y_train)

    alpha = clf.estimator_weights_
    h = clf.estimators_
    rho = sample_margin(X_train, alpha, h)

    R_test = 1 - clf.score(X_test, y_test)
    R_train = 1 - clf.score(X_train, y_train)
    error_bound = get_error_bound(rho, d, m, delta)

    return R_test, R_train, error_bound, rho

evaluate_error(m, d, delta)

0.22676960597684473


(0.487819, 0.48818700000000004, array([0.03954692]), array([0.81605038]))