### Arrange the results;
- Compare different normalization methods;
- Compare different features;
- Compare different image filters;


In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from mySettings import get_arrange_results_settings_dict

import sys
sys.path.append("../")
from utils.myUtils import traversalDir_FirstDir

In [None]:
"""
Arrange and save all the experiment results into an excel file.
"""
def arrange_results_to_excel(experiment_results_bathpath):
    
    # Data frame to save the arranged results;
    Arranged_Results=None
    
    image_type_list=traversalDir_FirstDir(experiment_results_bathpath)
    for image_type in image_type_list:
        # folder 1: image_type, for example, {original, exponential, ...}
        
        basePath_image_type=os.path.join(experiment_results_bathpath, image_type)
        experiment_description_list=traversalDir_FirstDir(basePath_image_type)
        
        for experiment_description in experiment_description_list:
            # folder 2: description of the expriments, for example, {TCGA_IDH_extracted_features_fcm, TCGA_IDH_extracted_features_no_normalization, TCGA_IDH_extracted_features_zscore}
            normalization_method=experiment_description.split("-")[-1]
            
            basePath_experiment_description=os.path.join(basePath_image_type, experiment_description)
            harmonization_method_list=traversalDir_FirstDir(basePath_experiment_description)
            
            for harmonization_method in harmonization_method_list:
                # folder 3: Harmonization method and data imbalance strategy;
                ComBat_method=harmonization_method.split("-")[0]
                Data_imblance_strategy=harmonization_method.split("-")[1]
                
                basePath_harmonization=os.path.join(basePath_experiment_description, harmonization_method)
                task_list=traversalDir_FirstDir(basePath_harmonization)
                
                for task in task_list:
                    # folder 4: Task list, for example,["TCGA_1.101_isGBM_base", "TCGA_2.101_isIDHMutant_base", "TCGA_3.101_is1p19qCodeleted_base"]
                    base_task=task.split("_")[2]
                    task_additional_description=task.split("_")[3]
                    
                    #print("\n- image_type={}, experiment_description={}, harmonization_method={}, task={}".format(image_type, experiment_description, harmonization_method, task))
                    basePath_task=os.path.join(basePath_harmonization, task)
                    AUC_results_txt_file=os.path.join(basePath_task, "AUC_results_all_models.txt")
                    
                    AUC_results=pd.read_csv(AUC_results_txt_file, header=0, index_col=0)
                    AUC_results.insert(0, "image_type", image_type)
                    AUC_results.insert(1, "normalization_method", normalization_method)
                    AUC_results.insert(2, "ComBat_method", ComBat_method)
                    AUC_results.insert(3, "Data_imblance_strategy", Data_imblance_strategy)
                    AUC_results.insert(4, "task", task)
                    AUC_results.insert(5, "base_task", base_task)
                    AUC_results.insert(6, "task_additional_description", task_additional_description)
                    
                    if isinstance(Arranged_Results, pd.DataFrame):
                        Arranged_Results=pd.concat([Arranged_Results, AUC_results], axis=0)
                    else:
                        Arranged_Results=AUC_results
                    
    # add a column to tell feature selection method and classifier
    Arranged_Results["feature_selection"]=Arranged_Results["model_name"].apply(lambda x: x.split("_")[0])
    Arranged_Results["classifier"]=Arranged_Results["model_name"].apply(lambda x: x.split("_")[1])
    
    # save the results
    save_arranged_excel_path=os.path.join(experiment_results_bathpath, "arranged_results.xlsx")
    Arranged_Results.reset_index(drop=True, inplace=True)
    Arranged_Results.to_excel(save_arranged_excel_path)
    
    return Arranged_Results, save_arranged_excel_path


def visualize_rranged_results(arranged_excel_path, groupby_column, plot_setting):
    """
    Plot the results in a bar plot for better visualization.
    """
        
    #read data
    Arranged_Results=pd.read_excel(arranged_excel_path, index_col=0)
    x_column=plot_setting["x_column"]
    hue_column=plot_setting["hue_column"]
    hue_order=plot_setting["hue_order"]
    
    ## save the results  
    save_bathpath=os.path.dirname(arranged_excel_path)
    save_excel_path=os.path.join(save_bathpath, "arranged_results_"+x_column+"-"+hue_column+".xlsx")
    writer = pd.ExcelWriter(save_excel_path)

    ## Analyze the results.
    for task, task_results_df in Arranged_Results.groupby([groupby_column], sort=True):
        print("\n\n ***********  task={} ******************".format(task))
        ## plot
        plt.figure(figsize=(15,5))
        plt.xticks(size=12)
        plt.yticks(size=12)
        plt.ylim((0, 1.1))
        plt.xlabel(x_column,size=16)
        plt.ylabel( 'mean_AUC',size=16)
        ax=sns.barplot(x=x_column, y="mean_AUC", hue=hue_column, hue_order=hue_order, data=task_results_df, palette="Paired") #viridis, Spectral
        plt.xticks(rotation=15)

        for p in ax.patches:
            color=p.get_facecolor()
            box = p.get_bbox()
            ax.annotate("%.4f" % p.get_height(), xy=((box.x0 + box.x1)/2-0.02, p.get_height()+0.02), color=color, 
                        rotation=90, fontsize=8, weight='bold')

        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width , box.height* 0.8])
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.2),  ncol=len(hue_order))
        #ax.legend(loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0, ncol=1)
        plt.subplots_adjust(left=0.07, bottom=0.2, right=0.98, top=0.75, wspace =0, hspace =0.5)
        plt.savefig(os.path.join(save_bathpath, task.replace(".", "-")+".jpeg"), dpi=300)
        plt.show()


        #save to the excel
        task_results_df.sort_values("mean_AUC", ascending=False, inplace=True)
        task_results_df.to_excel(writer, sheet_name=task)
    writer.save()

### Main

In [None]:
arrange_results_settings_dict=get_arrange_results_settings_dict()
for arrange_name, arrange_results_settings in arrange_results_settings_dict.items():
    results_basepath=arrange_results_settings["results_basepath"]
    groupby_column=arrange_results_settings["groupby_column"]
    plot_setting=arrange_results_settings["plot_setting"]
    
    ## Arrange the results;
    Arranged_Results, save_arranged_excel_path=arrange_results_to_excel(results_basepath)
    print("\n\n =========================== Arranged Results ====================================")
    display(Arranged_Results.head())
    
    ## Visualize the arranged results;
    visualize_rranged_results(save_arranged_excel_path, groupby_column, plot_setting)
    