In [6]:
import pandas as pd
from scipy.stats import friedmanchisquare
from utils import string_supporting_mean, print_rules
from experiments_utils.results.tables import Tables

def read_classification_results() -> pd.DataFrame:
    Tables.configure('../classification/min_supp_20')
    metrics = pd.concat(Tables.query('*', '*', '*', 'metrics', as_pandas=True))
    columns = [c for c in metrics.columns.tolist() if 'Unnamed' not in c]
    columns.remove('model_type')
    columns.remove('dataset')
    metrics = metrics[['dataset', 'model_type'] + columns]
    return metrics

def read_regression_results() -> pd.DataFrame:
    Tables.configure('../regression/min_supp_20')
    return pd.concat(Tables.query('*', '*', '*', 'metrics', as_pandas=True))

def read_survival_results() -> pd.DataFrame:
    Tables.configure('../survival/min_supp_20')
    return pd.concat(Tables.query('*', '*', '*', 'metrics', as_pandas=True))

In [7]:
classification_results = read_classification_results()
regression_results = read_regression_results()
survival_results = read_survival_results()



../classification/min_supp_20\*\*\*\metrics.csv




../regression/min_supp_20\*\*\*\metrics.csv




../survival/min_supp_20\*\*\*\metrics.csv


In [8]:
def perform_friedman_test(
    df: pd.DataFrame,
    score_column: str,
    model_type_column: str,
):
    return friedmanchisquare(*[
        df[df[model_type_column] == e][score_column] 
        for e in df[model_type_column].unique()
    ])


In [14]:

friedman_results = {
    'rules': {
        'classification': perform_friedman_test(
            classification_results,
            score_column='rules',
            model_type_column='model_type',
        ),
        'regression': perform_friedman_test(
            regression_results,
            score_column='rules',
            model_type_column='model_type',
        ),
        'survival': perform_friedman_test(
            survival_results,
            score_column='rules',
            model_type_column='model_type',
        ),
    },
    'score': {
        'classification': perform_friedman_test(
            classification_results,
            score_column='BAcc (test)',
            model_type_column='model_type',
        ),
        'regression': perform_friedman_test(
            regression_results,
            score_column='rRMSE (test)',
            model_type_column='model_type',
        ),
        'survival': perform_friedman_test(
            survival_results,
            score_column='integrated_brier_score (test)',
            model_type_column='model_type',
        ),
    }
}

In [34]:
def correct_p_values_fdr(pvalues: list):
    N = len(pvalues)
    k = 0
    order = {pvalue: i for i, pvalue in enumerate(pvalues)}
    pvalues = sorted(pvalues)
    adjusted_p_values = [None] * len(pvalues)

    k = 1
    for pvalue in pvalues:
        adj_p = pvalue * N / k
        adjusted_p_values[order[pvalue]] = adj_p
        k += 1
    return adjusted_p_values

In [35]:
df = pd.DataFrame()
df['Problem'] = ['classification', 'regression', 'survival']
for column_name, problems_results in friedman_results.items():
    pvalues = []
    for problem_name, problems_result in problems_results.items():
        pvalues.append(problems_result.pvalue)
    pvalues_corrected = correct_p_values_fdr(pvalues)
    df[f'{column_name} (Before correction)'] = pvalues
    df[f'{column_name} (After correction)'] = pvalues_corrected

df    

Unnamed: 0,Problem,rules (Before correction),rules (After correction),score (Before correction),score (After correction)
0,classification,0.00032,0.00032,0.216915,0.216915
1,regression,3e-06,1e-05,0.079657,0.119485
2,survival,2.7e-05,4.1e-05,0.039261,0.117784


: 