### Bootstrap

In [1]:
import numpy as np
x = np.random.normal(loc=50.0, scale=10.0, size=100)
nsamples = 500
bs_samples = np.random.choice(x, (nsamples, x.size), replace=True)
bs_distn = np.mean(bs_samples, axis=1)
print("Bootstrap CI: (%.4f, %.4f)"%(bs_distn[int(0.025*nsamples)], bs_distn[int(0.975*nsamples)]))

Bootstrap CI: (51.4287, 50.3141)


### Simple bagging example

In [3]:
import numpy as np
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

np.random.seed(0)
iris = load_iris()
X, y = iris.data, iris.target
indices = np.arange(y.shape[0])
np.random.shuffle(indices)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

In [5]:
bm_name = ['KNN','DT ','SVM']
for bm, basemodel in enumerate([KNeighborsClassifier(), DecisionTreeClassifier(), SVC(kernel='rbf')]):
    clf = BaggingClassifier(basemodel, n_estimators=10,
                            max_samples=0.5, max_features=0.5)

    pipe  = Pipeline(steps=[('scaler', StandardScaler()),
                            ('bagged_clf', clf)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    print(bm_name[bm], "f1_score", round(f1_score(y_test, y_pred,average='weighted'), 3))

KNN f1_score 0.933
DT  f1_score 0.967
SVM f1_score 0.967
