In [166]:
import numpy as np
import pandas as pd
from sklearn import metrics

# classifier
from sklearn.tree import DecisionTreeClassifier

In [167]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [168]:
def bagging(X, y):
    n_samples = X.shape[0]
    # doing random sampling with replacement
    indices = np.random.choice(n_samples, size=n_samples, replace=True)
    return X.iloc[indices], y.iloc[indices]



In [169]:
class BaggedClassifier:
    def __init__(self, n_estimators, n_neighbours=5):
        self.n_estimators = n_estimators
        self.n_neighbours = n_neighbours
        self.classifiers = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            clf = DecisionTreeClassifier(max_depth=4)

            # getting random sample for the given input
            X_sample, y_sample = bagging(X, y)

            # fitting the data on the given input
            clf.fit(X_sample, y_sample)

            self.classifiers.append(clf)

    def predict(self, X):
        preds = np.array([clf.predict(X) for clf in self.classifiers])
        preds = np.swapaxes(preds, 0, 1)

        # majority vote
        y_pred = [np.argmax(np.bincount(pred)) for pred in preds]
        return y_pred


In [170]:

dataset = load_iris()
df = pd.DataFrame({
    'sepal length': dataset.data[:,0],
    'sepal width': dataset.data[:,1],
    'petal length': dataset.data[:,2],
    'petal width': dataset.data[:,3],
    'species': dataset.target
})


In [171]:
print('-----------DATASET-----------')
print(df.sample(5))

X = df.iloc[:,:4]
y = df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

-----------DATASET-----------
     sepal length  sepal width  petal length  petal width  species
38            4.4          3.0           1.3          0.2        0
4             5.0          3.6           1.4          0.2        0
27            5.2          3.5           1.5          0.2        0
109           7.2          3.6           6.1          2.5        2
78            6.0          2.9           4.5          1.5        1


In [172]:
print('\nBuilding random forest classifier')
clf = BaggedClassifier(n_estimators=50)
clf.fit(X_train, y_train)
print('number of classifiers:', clf.n_estimators)



Building random forest classifier
number of classifiers: 50


In [173]:
y_pred = clf.predict(X_test)
print()
print('accuracy:', metrics.accuracy_score(y_test, y_pred))
print('confusion matrix:\n', metrics.confusion_matrix(y_test, y_pred))
print("Classification Report: \n", metrics.classification_report(y_test, y_pred))


accuracy: 0.9111111111111111
confusion matrix:
 [[12  0  0]
 [ 0 14  3]
 [ 0  1 15]]
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.93      0.82      0.87        17
           2       0.83      0.94      0.88        16

    accuracy                           0.91        45
   macro avg       0.92      0.92      0.92        45
weighted avg       0.92      0.91      0.91        45

