In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split
from sklearn import datasets

import warnings
warnings.filterwarnings("ignore")

  return f(*args, **kwds)


In [8]:
brc = datasets.load_breast_cancer()

In [9]:
x = brc.data
y = brc.target

In [14]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, random_state=9)

In [15]:
rf = RandomForestClassifier()
lr = LogisticRegression()
sv = SVC()
dt = DecisionTreeClassifier()

In [16]:
from sklearn.metrics import accuracy_score

In [17]:
for clf in (rf, lr, sv, dt):
    clf.fit(xtrain, ytrain)
    ypred = clf.predict(xtest)
    print(clf.__class__.__name__, accuracy_score(ytest, ypred))

RandomForestClassifier 0.965034965034965
LogisticRegression 0.958041958041958
SVC 0.9090909090909091
DecisionTreeClassifier 0.9440559440559441


# Addaboost using RandomForest

In [24]:
adda = AdaBoostClassifier(
    base_estimator=rf,
    n_estimators=50,
    learning_rate=1.0,
    algorithm='SAMME.R',
    random_state=None,
)

In [25]:
adda.fit(xtrain,ytrain)

AdaBoostClassifier(algorithm='SAMME.R',
                   base_estimator=RandomForestClassifier(bootstrap=True,
                                                         ccp_alpha=0.0,
                                                         class_weight=None,
                                                         criterion='gini',
                                                         max_depth=None,
                                                         max_features='auto',
                                                         max_leaf_nodes=None,
                                                         max_samples=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=1,
                                                         min_samples_split=2,
                                       

In [26]:
ypred = adda.predict(xtest)

In [27]:
accuracy_score(ytest, ypred)

0.972027972027972

# Addaboost using LogisticRegression

In [28]:
adda = AdaBoostClassifier(
    base_estimator=lr,
    n_estimators=50,
    learning_rate=1.0,
    algorithm='SAMME.R',
    random_state=None,
)

In [29]:
adda.fit(xtrain,ytrain)

AdaBoostClassifier(algorithm='SAMME.R',
                   base_estimator=LogisticRegression(C=1.0, class_weight=None,
                                                     dual=False,
                                                     fit_intercept=True,
                                                     intercept_scaling=1,
                                                     l1_ratio=None,
                                                     max_iter=100,
                                                     multi_class='auto',
                                                     n_jobs=None, penalty='l2',
                                                     random_state=None,
                                                     solver='lbfgs', tol=0.0001,
                                                     verbose=0,
                                                     warm_start=False),
                   learning_rate=1.0, n_estimators=50, random_state=None)

In [30]:
ypred = adda.predict(xtest)
accuracy_score(ytest, ypred)

0.965034965034965

# Addaboost using SVM

In [37]:
adda = AdaBoostClassifier(
    base_estimator=sv,
    n_estimators=50,
    learning_rate=1.0,
    algorithm='SAMME',
    random_state=None,
)

In [38]:
adda.fit(xtrain,ytrain)

AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                                      class_weight=None, coef0=0.0,
                                      decision_function_shape='ovr', degree=3,
                                      gamma='scale', kernel='rbf', max_iter=-1,
                                      probability=False, random_state=None,
                                      shrinking=True, tol=0.001,
                                      verbose=False),
                   learning_rate=1.0, n_estimators=50, random_state=None)

In [39]:
ypred = adda.predict(xtest)
accuracy_score(ytest, ypred)

0.6293706293706294

# Addaboost using DecisionTree

In [34]:
adda = AdaBoostClassifier(
    base_estimator=dt,
    n_estimators=50,
    learning_rate=1.0,
    algorithm='SAMME.R',
    random_state=None,
)

In [35]:
adda.fit(xtrain,ytrain)

AdaBoostClassifier(algorithm='SAMME.R',
                   base_estimator=DecisionTreeClassifier(ccp_alpha=0.0,
                                                         class_weight=None,
                                                         criterion='gini',
                                                         max_depth=None,
                                                         max_features=None,
                                                         max_leaf_nodes=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=1,
                                                         min_samples_split=2,
                                                         min_weight_fraction_leaf=0.0,
                                                         presort='deprecated',
                       

In [36]:
ypred = adda.predict(xtest)
accuracy_score(ytest, ypred)

0.965034965034965

# Let's calculate the HyperParameter using GridSearch

In [41]:
param = {
    "learning_rate": [.1,.5,1,1.5,2,5],
    "algorithm": ["SAMME", "SAMME.R"],
    "n_estimators": [10,50,100,200,500]
}

In [42]:
from sklearn.model_selection import GridSearchCV

In [50]:
grid = GridSearchCV(
    estimator=adda,
    param_grid= param,
    scoring=None,
    n_jobs=None,
    iid='deprecated',
    refit=True,
    cv=None,
    verbose=0,
    pre_dispatch='2*n_jobs',
    return_train_score=False,
)

In [51]:
grid.fit(xtrain,ytrain)

GridSearchCV(cv=None, error_score=nan,
             estimator=AdaBoostClassifier(algorithm='SAMME',
                                          base_estimator=SVC(C=1.0,
                                                             break_ties=False,
                                                             cache_size=200,
                                                             class_weight=None,
                                                             coef0=0.0,
                                                             decision_function_shape='ovr',
                                                             degree=3,
                                                             gamma='scale',
                                                             kernel='rbf',
                                                             max_iter=-1,
                                                             probability=False,
                                                        

In [52]:
grid.best_params_

{'algorithm': 'SAMME', 'learning_rate': 0.1, 'n_estimators': 10}

# Let's use them for Adda

In [53]:
adda = AdaBoostClassifier(
    base_estimator=dt,
    n_estimators=10,
    learning_rate=0.1,
    algorithm='SAMME',
    random_state=None,
)

In [54]:
adda.fit(xtrain,ytrain)

AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=DecisionTreeClassifier(ccp_alpha=0.0,
                                                         class_weight=None,
                                                         criterion='gini',
                                                         max_depth=None,
                                                         max_features=None,
                                                         max_leaf_nodes=None,
                                                         min_impurity_decrease=0.0,
                                                         min_impurity_split=None,
                                                         min_samples_leaf=1,
                                                         min_samples_split=2,
                                                         min_weight_fraction_leaf=0.0,
                                                         presort='deprecated',
                         

In [55]:
ypred = adda.predict(xtest)
accuracy_score(ytest, ypred)

0.951048951048951