# Bagging
#### Importation of resources:

In [13]:
import numpy as np 
from sklearn.datasets import make_classification
from sklearn.model_selection import learning_curve, train_test_split, cross_val_score, RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from imblearn.ensemble import BalancedRandomForestClassifier, EasyEnsembleClassifier, BalancedBaggingClassifier
from collections import Counter

seed=123


#### Making a random binary classification problem which contains 10000 observations and 90% belonging to one class. This can be viewed using the Counter function

In [14]:
X, y = make_classification(n_samples=10000, weights=[0.90])
Counter(y)

Counter({0: 8962, 1: 1038})

#### The various bagging classifiers along with the repeated stratified k fold validation method being used with the cv variable. 

In [139]:
bagClf = BaggingClassifier(random_state=seed)
bbc = BalancedBaggingClassifier(random_state = seed)
brf = BalancedRandomForestClassifier(n_estimators=10, random_state=seed)
bootrf = RandomForestClassifier(n_estimators=10, class_weight='balanced_subsample', random_state=seed)
eec = EasyEnsembleClassifier(n_estimators=10, random_state=seed)
rf = RandomForestClassifier(random_state=seed)

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=seed)

# Standard Bagging

In [140]:
scores = cross_val_score(bagClf, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.3f' % np.mean(scores),"%")

Accuracy: 0.968 %


# Bagging with random undersampling

In [141]:
scores = cross_val_score(bbc, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.3f' % np.mean(scores),"%")

Accuracy: 0.945 %


# Original Random Forest

In [142]:
scores = cross_val_score(rf, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.3f' % np.mean(scores),"%")

Accuracy: 0.968 %


# Random Forest with Bootstrap class weighting

In [144]:
scores = cross_val_score(bootrf, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.3f' % np.mean(scores),"%")

Accuracy: 0.964 %


# Random Forest with random undersampling

In [145]:
scores = cross_val_score(brf, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.3f' % np.mean(scores),"%")

Accuracy: 0.931 %


# Easy Ensemble

In [129]:
scores = cross_val_score(eec, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.3f' % np.mean(scores),"%")

Accuracy: 0.918 %
