In [1]:
import pandas as pd
from scipy.stats import friedmanchisquare
from utils import string_supporting_mean, print_rules
from experiments_utils.results.tables import Tables

def read_classification_results() -> pd.DataFrame:
    Tables.configure('../classification/min_supp_20')
    metrics = pd.concat(Tables.query('*', '*', '*', 'metrics', as_pandas=True))
    columns = [c for c in metrics.columns.tolist() if 'Unnamed' not in c]
    columns.remove('model_type')
    columns.remove('dataset')
    metrics = metrics[['dataset', 'model_type'] + columns]
    return metrics

def read_regression_results() -> pd.DataFrame:
    Tables.configure('../regression/min_supp_20')
    return pd.concat(Tables.query('*', '*', '*', 'metrics', as_pandas=True))

def read_survival_results() -> pd.DataFrame:
    Tables.configure('../survival/min_supp_20')
    return pd.concat(Tables.query('*', '*', '*', 'metrics', as_pandas=True))

In [2]:
classification_results = read_classification_results()
regression_results = read_regression_results()
survival_results = read_survival_results()



../classification/min_supp_20\*\*\*\metrics.csv




../regression/min_supp_20\*\*\*\metrics.csv




../survival/min_supp_20\*\*\*\metrics.csv


In [3]:
survival_results.columns

Index(['Unnamed: 0', 'dataset', 'M-of-N count',
       'integrated_brier_score (train)', 'integrated_brier_score (test)',
       'rules', 'conditions_count', 'plain conditions count',
       'complex conditions count', 'avg conditions per rule',
       'avg rule quality', 'avg rule precision', 'avg rule coverage',
       'training time total (s)', 'training time growing (s)',
       'training time pruning (s)', 'pvalue significance fdr',
       'pvalue significance fwer', 'induction measure', 'pruning measure',
       'voting measure', 'variant', 'model_type', 'M-of-N count (std)',
       'integrated_brier_score (train) (std)',
       'integrated_brier_score (test) (std)', 'rules (std)',
       'conditions_count (std)', 'plain conditions count (std)',
       'complex conditions count (std)', 'avg conditions per rule (std)',
       'avg rule quality (std)', 'avg rule precision (std)',
       'avg rule coverage (std)', 'training time total (s) (std)',
       'training time growing (s) (s

In [4]:
def get_times_table(metrics: pd.DataFrame) -> pd.DataFrame:
    return metrics[[
        'dataset',
        'model_type',
        'training time total (s)', 
        'training time total (s) (std)',
        'training time growing (s) (std)',
        'training time growing (s)',
        'training time pruning (s) (std)',
        'training time pruning (s)',

    ]]

### Classification

In [5]:
df = get_times_table(classification_results).round(3)
df.to_csv('./times/classification_full.csv', index=True)
df

Unnamed: 0,dataset,model_type,training time total (s),training time total (s) (std),training time growing (s) (std),training time growing (s),training time pruning (s) (std),training time pruning (s)
0,anneal,at_least_M-of-N,0.214,0.014,0.012,0.102,0.001,0.009
0,anneal,complex,0.278,0.071,0.064,0.408,0.005,0.013
0,anneal,exact_M-of-N,0.247,0.062,0.011,0.098,0.004,0.011
0,anneal,plain,0.082,0.014,0.023,0.168,0.002,0.014
0,auto-mpg,at_least_M-of-N,2.127,0.607,0.480,1.413,0.007,0.036
...,...,...,...,...,...,...,...,...
0,wine,plain,0.045,0.010,0.019,0.103,0.001,0.006
0,zoo,at_least_M-of-N,0.050,0.008,0.031,0.162,0.002,0.004
0,zoo,complex,0.245,0.110,0.111,0.443,0.005,0.030
0,zoo,exact_M-of-N,0.046,0.005,0.026,0.160,0.002,0.004


In [6]:
df = get_times_table(classification_results).groupby('model_type').agg(string_supporting_mean).drop('dataset', axis=1).round(3)
df.to_csv('./times/classification_mean.csv', index=True)
df

Unnamed: 0_level_0,training time total (s),training time total (s) (std),training time growing (s) (std),training time growing (s),training time pruning (s) (std),training time pruning (s)
model_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
at_least_M-of-N,3.053,0.673,0.367,2.59,0.012,0.094
complex,3.21,0.339,0.544,4.218,0.015,0.14
exact_M-of-N,3.041,0.72,0.343,2.479,0.015,0.098
plain,2.428,0.154,0.231,4.117,0.009,0.095


### Regression

In [7]:
df = get_times_table(regression_results).round(3)
df.to_csv('./times/regression_full.csv', index=True)
df

Unnamed: 0,dataset,model_type,training time total (s),training time total (s) (std),training time growing (s) (std),training time growing (s),training time pruning (s) (std),training time pruning (s)
0,auto-mpg,at_least_M-of-N,1022.540,2875.831,17.735,9.463,53.624,23.560
0,auto-mpg,complex,884.694,514.851,510.328,876.833,4.709,7.847
0,auto-mpg,exact_M-of-N,1020.735,2874.341,14.089,8.001,36.662,16.760
0,auto-mpg,plain,2.239,2.266,1.867,1.845,0.434,0.390
0,auto-price,at_least_M-of-N,4.991,6.248,0.726,0.798,0.531,0.386
...,...,...,...,...,...,...,...,...
0,servo,plain,0.039,0.009,0.008,0.034,0.001,0.002
0,veteran,at_least_M-of-N,1.696,1.628,0.552,0.665,0.238,0.187
0,veteran,complex,2.326,0.955,0.904,2.210,0.051,0.103
0,veteran,exact_M-of-N,1.600,1.530,0.492,0.586,0.179,0.156


In [8]:
df = get_times_table(regression_results).groupby('model_type').agg(string_supporting_mean).drop('dataset', axis=1).round(3)
df.to_csv('./times/regression_mean.csv', index=True)
df

Unnamed: 0_level_0,training time total (s),training time total (s) (std),training time growing (s) (std),training time growing (s),training time pruning (s) (std),training time pruning (s)
model_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
at_least_M-of-N,242.451,297.052,3.812,4.403,43.606,32.121
complex,352.48,121.477,119.819,346.54,1.817,5.916
exact_M-of-N,241.37,298.223,3.423,4.108,42.564,31.514
plain,81.655,81.251,58.171,60.296,23.246,21.355


### Survival

In [9]:
df = get_times_table(survival_results).round(3)
df.to_csv('./times/survival_full.csv', index=True)
df

Unnamed: 0,dataset,model_type,training time total (s),training time total (s) (std),training time growing (s) (std),training time growing (s),training time pruning (s) (std),training time pruning (s)
0,bmt-ch,at_least_M-of-N,0.453,0.205,0.076,0.114,0.02,0.024
0,bmt-ch,complex,2547.357,456.827,450.192,2498.58,7.148,48.75
0,bmt-ch,exact_M-of-N,0.619,0.536,0.325,0.254,0.086,0.059
0,bmt-ch,plain,28.098,9.707,7.292,21.248,2.452,6.839
0,cancer,at_least_M-of-N,21.015,25.214,0.803,0.723,0.494,0.349
0,cancer,complex,43.196,20.663,19.346,40.463,1.326,2.719
0,cancer,exact_M-of-N,20.698,24.824,0.374,0.542,0.171,0.214
0,cancer,plain,1.163,0.603,0.467,0.942,0.138,0.217
0,follic,at_least_M-of-N,189.703,263.268,2.666,1.424,46.646,19.229
0,follic,complex,3285.325,2597.392,2574.801,3253.628,22.616,31.678


In [10]:
df = get_times_table(survival_results).groupby('model_type').agg(string_supporting_mean).drop('dataset', axis=1).round(3)
df.to_csv('./times/survival_mean.csv', index=True)
df

Unnamed: 0_level_0,training time total (s),training time total (s) (std),training time growing (s) (std),training time growing (s),training time pruning (s) (std),training time pruning (s)
model_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
at_least_M-of-N,18.52,24.474,0.598,0.662,4.172,2.427
complex,4752.398,2394.628,2364.754,4688.277,30.269,64.099
exact_M-of-N,24.255,31.535,1.044,1.268,7.579,7.217
plain,52.753,34.793,24.895,37.768,10.039,14.977
