In [155]:
import survivors
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [160]:
def get_best_by_full_name(df_full, by_metric="IAUC", choose="max"):
    df = df_full.copy()
    df["METHOD"] = df.apply(lambda x: x["METHOD"].replace("CRAID", f"Tree({x['CRIT']})"), axis=1)
    if not (by_metric in df.columns):
        return None
    best_table = pd.DataFrame([], columns=df.columns)
    for method in df["METHOD"].unique():
        sub_table = df[df["METHOD"] == method]
        if sub_table.shape[0] == 0:
            continue
        if choose == "max":
            best_row = sub_table.loc[sub_table[by_metric].apply(np.mean).idxmax()]
        else:
            best_row = sub_table.loc[sub_table[by_metric].apply(np.mean).idxmin()]
        best_table = best_table.append(dict(best_row), ignore_index=True)
    return best_table


def plot_boxplot_results(df_full, dir_path=None, metrics=[],
                         dataset_name="", all_best=False,
                         by_metric="IAUC", choose="max"):
    if not (all_best):
        df_ = get_best_by_full_name(df_full, by_metric, choose)
    for m in metrics:
        if all_best:
            df_ = get_best_by_full_name(df_full, m, choose="min" if m == "IBS" else "max")
        plt.rcParams.update({'font.size': 15})
        fig, axs = plt.subplots(1, figsize=(8, 8))

        plt.title(f"{dataset_name} {m}")
        plt.boxplot(df_[m][::-1], labels=df_['METHOD'][::-1], showmeans=True, vert=False)
        if dir_path is None:
            plt.show()
        else:
            plt.savefig(os.path.join(dir_path, f"{dataset_name}_{m}_boxplot.png"))
            plt.close(fig)


def import_tables(dirs):
    dfs = []
    for d in dirs:
        df_ = pd.read_excel(d)
        df_["table_path"] = d
        dfs.append(df_)
    df = pd.concat(dfs, ignore_index=True)
    df = df.drop_duplicates()
    for c in ["IBS", "IAUC", "CI", "CI_CENS"]:
        df[c] = df[c].apply(lambda x: list(map(float, x[1:-1].split())))
    return df


def table_crowler(path_dir, type_postfix="xlsx"):
    path_tables = []
    dirs = [path_dir]
    while len(dirs) > 0:
        dir_ = dirs.pop(0)
        for obj in os.listdir(dir_):
            path = os.path.join(dir_, obj)
            if os.path.isfile(path):
                if path.find(type_postfix) != -1:
                    path_tables.append(path)
            else:
                dirs.append(path)
    return path_tables

In [182]:
def aggreg_experiments(path_dir):
    paths = table_crowler(path_dir, type_postfix="full.xlsx")
    df_full = import_tables(paths)
    df_best_by_metric = get_best_by_full_name(df_full, by_metric="IBS", choose="min")
    
    final_path = os.path.join("AGGREG_RES", path_dir + "_")
    df_full.to_excel(final_path + "FULL_AGGREG.xlsx", index=False)
    df_best_cut = df_best_by_metric.loc[:, ["METHOD", "PARAMS", "table_path", "CI_mean", "IBS_mean", "IAUC_mean"]].round(5)
    df_best_cut.to_excel(final_path + "BEST_AGGREG.xlsx", index=False)
    
    plot_boxplot_results(df_full, metrics=["IBS", "IAUC", "CI"], dataset_name=path_dir, dir_path="AGGREG_RES")

In [187]:
aggreg_experiments("PBC")

  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)
  best_table = best_table.append(dict(best_row), ignore_index=True)


In [167]:
res

Unnamed: 0,METHOD,PARAMS,table_path,CI_mean,IBS_mean,IAUC_mean
0,BoostingTree(weights),"{'aggreg_func': 'wei', 'categ': ['htreat', 'me...",GBSG\sqrt_hist\sqrt_wei_models_GBSG_full.xlsx,0.623081,0.170399,0.721587
1,BoostingTree(logrank),"{'aggreg_func': 'wei', 'categ': ['htreat', 'me...",GBSG\leaf_models_GBSG_full.xlsx,0.624905,0.170122,0.701094
2,BoostingTree(peto),"{'aggreg_func': 'mean', 'categ': ['htreat', 'm...",GBSG\leaf_models_GBSG_full.xlsx,0.623114,0.170951,0.703117
3,BoostingTree(tarone-ware),"{'aggreg_func': 'wei', 'categ': ['htreat', 'me...",GBSG\new_grid_params\new_GBSG_full.xlsx,0.621837,0.170392,0.726231
4,BoostingTree(wilcoxon),"{'aggreg_func': 'mean', 'categ': ['htreat', 'm...",GBSG\WEI KM_in_crit\wei_survive_GBSG_full.xlsx,0.62008,0.171159,0.68087
