In [1]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.ensemble import AdaBoostClassifier
import plotly.plotly     as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

In [2]:
# set parameters
n_iterations = 20
n_samples    = 300
noise        = 0.2

In [3]:
# create dataset
X, y = make_moons(n_samples=n_samples, noise=noise)

In [4]:
# fit classifier
adaboost = AdaBoostClassifier(n_estimators=n_iterations)
adaboost.fit(X, y)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=20, random_state=None)

In [5]:
# get estimators in the ensemble
estimators = adaboost.estimators_

In [6]:
# get sample weights
staged_classification = np.array(list(adaboost.staged_predict(X)))
staged_missclassified = staged_classification != y
staged_sample_weights = np.ones(shape=(n_iterations+1, len(X))) / len(X)
for istage in range(1, n_iterations+1):
    estimator_weight = adaboost.estimator_weights_[istage-1]
    sample_weight = staged_sample_weights[istage-1].copy()
    incorrect = staged_missclassified[istage-1]
    ############ code snippets from sklearn AdaboostClassifier source ############
    # Only boost positive weights
    sample_weight *= np.exp(estimator_weight * incorrect *
                                    ((sample_weight > 0) |
                                     (estimator_weight < 0)))
    ##############################################################################
    sample_weight /= np.sum(sample_weight)
    staged_sample_weights[istage] = sample_weight

In [7]:
# prepare to plot decision boundary
h = .02
xrange = np.max(X[:, 0]) - np.min(X[:, 0])
yrange = np.max(X[:, 1]) - np.min(X[:, 1])
xs = np.arange(np.min(X[:, 0])-xrange*0.1, np.max(X[:, 0])+xrange*0.1, h)
ys = np.arange(np.min(X[:, 1])-xrange*0.1, np.max(X[:, 1])+xrange*0.1, h)
xx, yy = np.meshgrid(xs, ys)
staged_zz = np.array(list(adaboost.staged_predict(np.c_[xx.ravel(), yy.ravel()])))
staged_zz = staged_zz.reshape(len(staged_zz), xx.shape[0], xx.shape[1])

In [8]:
selected_iter = 20

data   = [go.Scatter(x=X[:, 0], y=X[:, 1], mode='markers', 
                     marker=dict(color=y, colorscale='RdBu', size=5)),
          go.Heatmap(x=xs, y=ys, z=staged_zz[selected_iter-1], 
                     colorscale='RdBu', opacity=0.3, showscale=False)]
layout = go.Layout(title='Decision Boundary', autosize=False, width=500, height=500)
fig = go.Figure(data=data, layout=layout)

iplot(fig)

data   = [go.Scatter(x=X[:, 0], y=X[:, 1], mode='markers', 
                     marker=dict(color=y, colorscale='RdBu', line=dict(width=0), 
                                 size=np.sqrt(staged_sample_weights[selected_iter]*3000))),
          go.Heatmap(x=xs, y=ys, z=staged_zz[selected_iter-1], 
                     colorscale='RdBu', opacity=0.3, showscale=False)]
layout = go.Layout(title='Sample Weights', autosize=False, width=500, height=500)
fig = go.Figure(data=data, layout=layout)

iplot(fig)

try:
    next_estimator = estimators[selected_iter]
    next_zz = next_estimator.predict(np.c_[xx.ravel(), yy.ravel()])
    next_zz = next_zz.reshape(xx.shape)

    data   = [go.Scatter(x=X[:, 0], y=X[:, 1], mode='markers', 
                         marker=dict(color=y, colorscale='RdBu', line=dict(width=0), 
                                     size=np.sqrt(staged_sample_weights[selected_iter]*3000))),
              go.Heatmap(x=xs, y=ys, z=next_zz, 
                         colorscale='RdBu', opacity=0.3, showscale=False)]
except:
    data   = [go.Scatter(x=X[:, 0], y=X[:, 1], mode='markers', 
                         marker=dict(color=y, colorscale='RdBu', line=dict(width=0), 
                                     size=np.sqrt(staged_sample_weights[selected_iter]*3000)))]
layout = go.Layout(title='S', autosize=False, width=500, height=500)
fig = go.Figure(data=data, layout=layout)

print("next estimator")
iplot(fig)

decision boundary


sample weights


next estimator
