In [1]:
import numpy as np
import pandas as pd
from decorate import DecorateClassifier 
from data_generation_methods import *
from datasets import *
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

np.warnings.filterwarnings('ignore')

In [2]:
datasets = [
           LymphographyDataset(),
           AbaloneDataset(),
           BalanceScaleDataset(),
           LetterRecognitionDataset(),
           ContraceptiveMethodChoiceDataset(),
           ChessDataset(),
           CarDataset(),
           GlassDataset(),
          NurseryDataset(),
           HayesRothDataset()
]

In [3]:
acc_avgs = {} # map between dataset -> 10-fold accuracy evaluation 
skf = StratifiedKFold(n_splits=10)

for dataset in datasets:    
    dec = DecorateClassifier(n_estimators=100, n_iter=50, art_factor=0.1)
    acc_avg = 0 # accuracy average over all folds
    X, y = dataset.get_X_y()
    
    for train_index, test_index in skf.split(X,y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[train_index], y[test_index]
        dec.fit(X_train, y_train)        
        y_pred = dec.predict(X_test)
        acc_avg += accuracy_score(y_pred, y_test)
        
    ds_name = dataset.__class__.__name__.replace('Dataset', '')
    acc_avgs[ds_name] = acc_avg/10
    print(f"Done {ds_name}, accuracy = {acc_avgs[ds_name]}")

print()
print(acc_avgs)    

Done Lymphography, accuracy = 0.9153846153846154
Done Abalone, accuracy = 0.920988893349391
Done BalanceScale, accuracy = 0.25333333333333335
Done LetterRecognition, accuracy = 0.9903625687504383
Done ContraceptiveMethodChoice, accuracy = 0.8946763639447415
Done Chess, accuracy = 0.9794645135911105
Done Car, accuracy = 0.9988505747126437
Done Glass, accuracy = 1.0
Done Nursery, accuracy = 0.9996144949884348
Done HayesRoth, accuracy = 0.9933333333333334

{'Lymphography': 0.9153846153846154, 'Abalone': 0.920988893349391, 'BalanceScale': 0.25333333333333335, 'LetterRecognition': 0.9903625687504383, 'ContraceptiveMethodChoice': 0.8946763639447415, 'Chess': 0.9794645135911105, 'Car': 0.9988505747126437, 'Glass': 1.0, 'Nursery': 0.9996144949884348, 'HayesRoth': 0.9933333333333334}


In [5]:
shapes = {ds.__class__.__name__.replace('Dataset', ''): ds.shape for ds in datasets}
print(shapes)
    

{'Lymphography': (148, 19), 'Abalone': (4177, 9), 'BalanceScale': (625, 5), 'LetterRecognition': (20000, 17), 'ContraceptiveMethodChoice': (1473, 10), 'Chess': (28056, 7), 'Car': (1728, 7), 'Glass': (214, 11), 'Nursery': (12960, 9), 'HayesRoth': (132, 6)}
