# Adaboost with a Custom Sklearn Classifier
This is a dummy example of using sklearn's adaboost metaclassifier with a custom sklearn classifier.

## Imports

In [8]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import math

## Test- and Trainining Dataset

In [9]:
X_all, y_all = make_classification(n_samples=1000, n_features=50,
                           n_informative=20, n_redundant=0,
                           random_state=0, shuffle=False, class_sep=1.4)

X, X_test, y, y_test = train_test_split(X_all, y_all, test_size=0.33, random_state=42)

## AdaBoost Classifier with the a Decision Stump as Base Classifier

In [10]:
clf = AdaBoostClassifier(n_estimators=50, random_state=0)
clf.fit(X, y)
clf.score(X, y)

0.9776119402985075

## AdaBoostClassifier with a Custom Base Classifier

In [11]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import precision_score

class SillyBinaryClassifier(BaseEstimator, ClassifierMixin):
  def __init__(self, steps=100):
    self.steps = steps

  def fit(self, X, y, sample_weight=None):
    self.n_classes_ = 2
    self.classes_ = np.array([0, 1])

    X = np.asarray(X)
    y = np.asarray(y)

    best_row = 0
    best_threshold = -np.inf
    best_score = 0
    for row in range(len(X[0])):
      self.row_ = row
      min_x = min(X[:, self.row_])
      max_x = max(X[:, self.row_])
      range_x = max_x - min_x
      step_width = range_x / self.steps
      self.threshold_ = min_x
      for step in range(self.steps):
        pred = self.predict(X)
        score = self.score(X, y, sample_weight)
        if score > best_score:
          best_score = score
          best_threshold = self.threshold_
          best_row = self.row_
          best_score = score
        self.threshold_ += step_width

      self.threshold_ = best_threshold
      self.row_ = best_row
    return self

  def predict(self, X):
    return np.where(X[:, self.row_] < self.threshold_, 0, 1)

In [12]:
silly_classifier = SillyBinaryClassifier(steps=10)
silly_classifier.fit(X, y)
silly_classifier.score(X_test, y_test)

0.5909090909090909

In [14]:
silly_ada = AdaBoostClassifier(base_estimator=SillyBinaryClassifier(steps=10), n_estimators=100, random_state=0, algorithm="SAMME")
silly_ada.fit(X,y)
silly_ada.score(X_test, y_test)

0.7