In [13]:
import numpy as np
import pandas as pd
from decorate import DecorateClassifier 
from data_generation_methods import *
from datasets import *
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
import time

np.warnings.filterwarnings('ignore')

In [14]:
datasets = [
           LymphographyDataset(),
           AbaloneDataset(),
           BalanceScaleDataset(),
           LetterRecognitionDataset(),
           ContraceptiveMethodChoiceDataset(),
           ChessDataset(),
           CarDataset(),
           GlassDataset(),
           NurseryDataset(),
           HayesRothDataset()
]
shapes = {ds.__class__.__name__.replace('Dataset', ''): ds.shape for ds in datasets}
print(shapes)

{'Lymphography': (148, 19), 'Abalone': (4177, 9), 'BalanceScale': (625, 5), 'LetterRecognition': (20000, 17), 'ContraceptiveMethodChoice': (1473, 10), 'Chess': (28056, 7), 'Car': (1728, 7), 'Glass': (214, 11), 'Nursery': (12960, 9), 'HayesRoth': (132, 6)}


In [15]:
acc_avgs = {} # map between dataset -> 10-fold accuracy evaluation 
fit_time_avgs = {} # average time for fit 
predict_time_avgs = {} # average time for fit 

skf = StratifiedKFold(n_splits=10)
for dataset in datasets:    
    ## init sums
    acc_avg = 0 
    fit_avg = 0
    predict_avg = 0
    
    ## classifier and data
    dec = DecorateClassifier(n_estimators=100, n_iter=50, art_factor=0.1)
    X, y = dataset.get_X_y()
    
    ## k-fold loop
    for train_index, test_index in skf.split(X,y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        fit_tic = time.time()
        dec.fit(X_train, y_train)
        fit_tac = time.time()
        
        predict_tic = time.time()
        y_pred = dec.predict(X_test)
        predict_tac = time.time()
        
        acc_avg += accuracy_score(y_pred, y_test)
        fit_avg += (fit_tac - fit_tic)
        predict_avg += (predict_tac - predict_tic)
        
    ds_name = dataset.__class__.__name__.replace('Dataset', '')
    acc_avgs[ds_name] = round(acc_avg/10, 3)
    fit_time_avgs[ds_name] = round(fit_avg/10, 3)
    predict_time_avgs[ds_name] = round(predict_avg/10, 3)
    print(f"Done {ds_name}, accuracy = {acc_avgs[ds_name]}")

print()
print("acc_avgs:\n\t ", acc_avgs)    
print()
print("fit_time_avgs:\n\t ", fit_time_avgs)    
print()
print("predict_time_avgs:\n\t ", predict_time_avgs)    

Done Lymphography, accuracy = 0.923
Done Abalone, accuracy = 0.922
Done BalanceScale, accuracy = 0.253
Done LetterRecognition, accuracy = 0.993
Done ContraceptiveMethodChoice, accuracy = 0.908
Done Chess, accuracy = 0.98
Done Car, accuracy = 0.997
Done Glass, accuracy = 1.0
Done Nursery, accuracy = 1.0
Done HayesRoth, accuracy = 0.987

acc_avgs:
	  {'Lymphography': 0.923, 'Abalone': 0.922, 'BalanceScale': 0.253, 'LetterRecognition': 0.993, 'ContraceptiveMethodChoice': 0.908, 'Chess': 0.98, 'Car': 0.997, 'Glass': 1.0, 'Nursery': 1.0, 'HayesRoth': 0.987}

fit_time_avgs:
	  {'Lymphography': 0.421, 'Abalone': 1.376, 'BalanceScale': 0.467, 'LetterRecognition': 5.621, 'ContraceptiveMethodChoice': 0.537, 'Chess': 8.809, 'Car': 0.583, 'Glass': 0.422, 'Nursery': 2.184, 'HayesRoth': 0.369}

predict_time_avgs:
	  {'Lymphography': 0.028, 'Abalone': 0.049, 'BalanceScale': 0.038, 'LetterRecognition': 0.077, 'ContraceptiveMethodChoice': 0.031, 'Chess': 0.112, 'Car': 0.031, 'Glass': 0.033, 'Nursery': 