In [150]:
import numpy as np
import pandas as pd
from sklearn import metrics

# classifier
from sklearn.tree import DecisionTreeClassifier

In [151]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [152]:
def bagging(X, y):
    n_samples = X.shape[0]
    indices = np.random.choice(n_samples, size=n_samples, replace=True) # random sampling with replacement
    return X.iloc[indices], y.iloc[indices]


In [153]:
class BaggedClassifier:
    def __init__(self, n_estimators, n_neighbours = 5): 
        self.n_estimators = n_estimators
        self.n_neighbours = n_neighbours
        self.classifiers = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            clf = DecisionTreeClassifier(max_depth=3)
            X_sample, y_sample = bagging(X, y)
            clf.fit(X_sample, y_sample)

            self.classifiers.append(clf)


    def predict(self, X):
        preds = np.array([clf.predict(X) for clf in self.classifiers])
        preds = np.swapaxes(preds, 0 , 1)

        # majority vote
        y_pred = [np.argmax(np.bincount(pred)) for pred in preds]
        return y_pred


In [154]:

dataset = load_iris()
df = pd.DataFrame({
    'sepal length': dataset.data[:,0],
    'sepal width': dataset.data[:,1],
    'petal length': dataset.data[:,2],
    'petal width': dataset.data[:,3],
    'species': dataset.target
})


In [155]:
print('-----------DATASET-----------')
print(df.sample(5))

X = df.iloc[:,:4]
y = df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

-----------DATASET-----------
     sepal length  sepal width  petal length  petal width  species
39            5.1          3.4           1.5          0.2        0
105           7.6          3.0           6.6          2.1        2
59            5.2          2.7           3.9          1.4        1
57            4.9          2.4           3.3          1.0        1
13            4.3          3.0           1.1          0.1        0


In [156]:
print('\nBuilding random forest classifier')
clf = BaggedClassifier(n_estimators=100)
clf.fit(X_train, y_train)
print('number of classifiers:', clf.n_estimators)



Building random forest classifier
number of classifiers: 100


In [157]:
y_pred = clf.predict(X_test)
print()
print('accuracy:', metrics.accuracy_score(y_test, y_pred))
print('confusion matrix:\n', metrics.confusion_matrix(y_test, y_pred))
print("Classification Report: \n", metrics.classification_report(y_test, y_pred))


accuracy: 0.9333333333333333
confusion matrix:
 [[15  0  0]
 [ 0 13  2]
 [ 0  1 14]]
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.93      0.87      0.90        15
           2       0.88      0.93      0.90        15

    accuracy                           0.93        45
   macro avg       0.93      0.93      0.93        45
weighted avg       0.93      0.93      0.93        45

