In [147]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

%matplotlib inline

In [153]:
interpretability_bounds = dict(
    fraction_overlap=0.1,
    fraction_classes=1,
    fraction_uncovered=0.15,
    average_rule_width=8,
    ruleset_length=10
)

def is_solution_interpretable(metrics):
    return (
        metrics["fraction_overlap"] <= interpretability_bounds["fraction_overlap"] and
        metrics["fraction_classes"] >= interpretability_bounds["fraction_classes"] and
        metrics["fraction_uncovered"] <= interpretability_bounds["fraction_uncovered"] and
        metrics["average_rule_width"] <= interpretability_bounds["average_rule_width"] and
        metrics["ruleset_length"] <= interpretability_bounds["ruleset_length"]
    )

is_solution_interpretable_np = np.vectorize(is_solution_interpretable)

In [210]:
def process_numbers(x):
    if type(x) == float:
        x_rounded = round(x, 2)
        x_rounded_str = str(x_rounded)
        
        m_d0 = re.search("^(\d\.\d)0$", x_rounded_str)
        
        if x_rounded_str in ["0.00", "0.0"]:
            return "0"
        elif int(x_rounded) == float(x_rounded):
            return str(int(x_rounded))
        elif m_d0:
            return m_d0.group(1)
        else:
            return x_rounded
    
    return x

def swap_comma(x):
    return str(x).replace(".", ",")

In [211]:
def get_interpretable_column(auc_df):
    interpretable_arr = []
    
    for row in auc_df.iterrows():
        interpretable = is_solution_interpretable(row[1])
        interpretable_arr.append(interpretable)
        
    return interpretable_arr
    


In [219]:
def process_auc_dataframe(auc_df):
    auc_df = auc_df[auc_df.columns[1:]]
    dataset_mask = auc_df["dataset_name"].isin(["iris", "lymph", "anneal"])
    
    auc_df_processed = auc_df[dataset_mask].groupby(["dataset_name", "algorithm"]).mean()
    auc_df_processed["Interpretable"] = get_interpretable_column(auc_df_processed)
    
    selected_columns = ["auc", "fraction_classes", "fraction_overlap", "fraction_uncovered", "average_rule_width", "ruleset_length", "Interpretable", "rule_cutoff"]

    auc_df_processed = auc_df_processed[selected_columns]
    auc_df_processed = auc_df_processed.applymap(lambda x: round(x, 2))
    auc_df_processed.columns = [
        "AUC",
        "Fraction Classes",
        "Fraction Overlap",
        "Fraction Uncovered",
        "Average Rule Width",
        "Ruleset Length",
        "Interpretable",
        "Rule Cutoff"
    ]
    auc_df_processed.index.names = ["Dataset", "Algorithm"]
    
    auc_df_processed["Interpretable"]  = auc_df_processed["Interpretable"].apply(lambda x: "Y" if x else "N") 

    auc_df_processed = auc_df_processed.applymap(process_numbers).applymap(swap_comma)
    
    return auc_df_processed

def preprocess_to_latex(df):
    processed_df = df.copy()
    new_cols = [ f"\rot{{90}}{{0em}}{{{col}}}" for col in processed_df.columns ]
    
    processed_df.columns = new_cols
    
    return processed_df

In [None]:
def to_latex(input_path, output_path):
    df = pd.read_csv(input_path)
    processed_df = process_auc_dataframe(df)
    processed_df_prelatex = preprocess_to_latex(processed_df)
    processed_df_latex = processed_df_prelatex.to_latex(escape=False, column_format="ccllllllll")

    open(output_path, "w").write(processed_df_latex)

In [225]:
benchmark_auc_only_df = pd.read_csv("../output_data/auc_only_pyids_benchmark.csv")
processed_auc_only_pyids = process_auc_dataframe(benchmark_auc_only_df)
processed_auc_only_pyids_prelatex = preprocess_to_latex(processed_auc_only_pyids)
processed_auc_only_pyids_latex = processed_auc_only_pyids_prelatex.to_latex(escape=False, column_format="ccllllllll")

open("../output_data/auc_only_pyids_benchmark_table.tex", "w").write(processed_auc_only_pyids_latex)

3838

In [None]:
benchmark_cba_auc_only_df = pd.read_csv("../output_data/cba_auc_interpretability_benchmark.csv")

processed_cba_auc_only = process_auc_dataframe(benchmark_cba_auc_only_df)

processed_cba_auc_only_prelatex = preprocess_to_latex(processed_cba_auc_only)

processed_auc_only_pyids_latex = processed_cba_auc_only_prelatex.to_latex(escape=False, column_format="ccllllllll")