# Ada boost
- https://www.analyticsvidhya.com/blog/2021/09/adaboost-algorithm-a-complete-guide-for-beginners/

In [16]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [17]:
models = []
alphas = []
def my_adaboost(X,y,n_estimators=5):
    n_samples, n_features = X.shape
    weights = np.ones(n_samples) / n_samples

    for _ in range(n_estimators):
        model = DecisionTreeClassifier(max_depth=1)
        model.fit(X, y, sample_weight=weights)
        predictions = model.predict(X)

        error = np.sum(weights * (predictions != y))

        alpha = 0.5 * np.log((1 - error) / (error + 1e-10))
        alphas.append(alpha)

        weights *= np.exp(-alpha * y * predictions)
        weights /= np.sum(weights)

        models.append(model)
        
def adaboost_predict(X):
    n_samples = X.shape[0]
    predictions = np.zeros(n_samples)

    for alpha, model in zip(alphas, models):
        predictions += alpha * model.predict(X)

    return np.sign(predictions)

In [18]:
    # Create synthetic data
X, Y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Create an AdaBoost classifier
my_adaboost(X_train,Y_train,n_estimators=5)


# Make predictions on the testing set
y_pred = adaboost_predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(Y_test, y_pred)
print("Test Accuracy:", accuracy)

# Make predictions on the testing set
y_train_pred = adaboost_predict(X_train)
accuracy = accuracy_score(Y_train, y_train_pred)
print("Train Accuracy:", accuracy)

test Accuracy: 0.86
Train Accuracy: 0.87625
