## This example compares the ``SelfPacedEnsembleClassifier`` with other methods

In [1]:
RANDOM_STATE = 42

## Preparation
First, we will import necessary packages and load the **covtype** dataset.

In [2]:
from self_paced_ensemble import SelfPacedEnsembleClassifier
from self_paced_ensemble.canonical_ensemble import *
from self_paced_ensemble.utils import load_covtype_dataset
from self_paced_ensemble.self_paced_ensemble.base import sort_dict_by_key

from time import time
from collections import Counter
import matplotlib.pyplot as plt

from sklearn.decomposition import KernelPCA
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import average_precision_score

In [3]:
X_train, X_test, y_train, y_test = load_covtype_dataset(subset=0.1, random_state=RANDOM_STATE)

origin_distr = sort_dict_by_key(Counter(y_train))
test_distr = sort_dict_by_key(Counter(y_test))
print('Original training dataset shape %s' % origin_distr)
print('Original test dataset shape     %s' % test_distr)


Dataset used: 		Forest covertypes from UCI (10.0% random subset)
Positive target:	7
Imbalance ratio:	27.328
Original training dataset shape {0: 44840, 1: 1640}
Original test dataset shape     {0: 11211, 1: 411}


## Train all ensemble classifiers

In [4]:
init_kwargs = {
    'n_estimators': 10,
    'random_state': RANDOM_STATE,
}
fit_kwargs = {
    'X': X_train,
    'y': y_train,
}

ensembles = {
    'SelfPacedEnsemble': SelfPacedEnsembleClassifier,
    'SMOTEBagging': SMOTEBaggingClassifier,
    'SMOTEBoost': SMOTEBoostClassifier,
    'UnderBagging': UnderBaggingClassifier,
    'RUSBoost': RUSBoostClassifier,
    'BalanceCascade': BalanceCascadeClassifier,
}

fit_ensembles = {}
for ensemble_name, ensemble_class in ensembles.items():
    ensemble_clf = ensemble_class(**init_kwargs)
    print ('Training {:^20s} '.format(ensemble_name), end='')
    start_time = time()
    ensemble_clf.fit(X_train, y_train)
    fit_time = time() - start_time
    y_pred = ensemble_clf.predict_proba(X_test)[:, 1]
    score = average_precision_score(y_test, y_pred)
    print ('| AUPRC {:.3f} | Time {:.3f}s'.format(score, fit_time))

Training  SelfPacedEnsemble   | AUPRC 0.907 | Time 0.482s
Training     SMOTEBagging     | AUPRC 0.895 | Time 14.080s
Training      SMOTEBoost      | AUPRC 0.479 | Time 3.110s
Training     UnderBagging     | AUPRC 0.769 | Time 0.583s
Training       RUSBoost       | AUPRC 0.531 | Time 0.196s
Training    BalanceCascade    | AUPRC 0.871 | Time 0.464s
