## Arrange results from the abundant experiment results.



In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from mySettings import *

import sys
sys.path.append("../")
from utils.myUtils import traversalDir_FirstDir

In [None]:
"""
Arrange and save all the experiment results into an excel file.
"""
def arrange_results(results_bathpath, save_analyzed_results_basepath):
    Arranged_Results=None
    
    basic_description_list=traversalDir_FirstDir(results_bathpath)
    for basic_description in basic_description_list:
        baseResultsPath_description=os.path.join(results_bathpath, basic_description)
        experiment_class_list=traversalDir_FirstDir(baseResultsPath_description)
        
        for experiment_class in experiment_class_list:
            baseResultsPath_experiment=os.path.join(baseResultsPath_description, experiment_class)
            harmonization_method_list=traversalDir_FirstDir(baseResultsPath_experiment)
            
            for harmonization_method in harmonization_method_list:
                baseResultsPath_harmonization=os.path.join(baseResultsPath_experiment, harmonization_method)
                task_list=traversalDir_FirstDir(baseResultsPath_harmonization)
                
                for task in task_list:
                    #print("\n\n ==basic_description={}, experiment_class={}, harmonization_method={}, task={}===\n".format(basic_description, experiment_class, harmonization_method, task))
                    baseResultsPath_task=os.path.join(baseResultsPath_harmonization, task)
                    AUC_results_txt_file=os.path.join(baseResultsPath_task, "AUC_results_all_models.txt")
                    
                    AUC_results=pd.read_csv(AUC_results_txt_file, header=0, index_col=0)
                    AUC_results.insert(0, "image_type", basic_description)
                    AUC_results.insert(1, "experiment_class_and_features", experiment_class)
                    AUC_results.insert(1, "experiment_class", experiment_class.split("_")[0]+"_"+experiment_class.split("_")[1])
                    AUC_results.insert(2, "harmonization_method", harmonization_method)
                    AUC_results.insert(3, "task", task)
                    task_split=task.split("_")
                    AUC_results.insert(4, "base_task", task_split[0]+"_"+task_split[2])
                    #print(AUC_results)
                    
                    if isinstance(Arranged_Results, pd.DataFrame):
                        Arranged_Results=pd.concat([Arranged_Results, AUC_results], axis=0)
                    else:
                        Arranged_Results=AUC_results
                    
    
    save_arranged_results_path=os.path.join(save_analyzed_results_basepath, "arranged_results.xlsx")
    Arranged_Results.reset_index(drop=True, inplace=True)
    Arranged_Results.to_excel(save_arranged_results_path)
    
    return Arranged_Results
    
    


In [None]:
"""
Analyze the impact of image types.
"""
def analyze_impact_of_imagetypes(results_bathpath, save_analyzed_results_basepath):
    ## makedir
    if not os.path.exists(save_analyzed_results_basepath):
        os.makedirs(save_analyzed_results_basepath)
        
    ## Arrange the results
    Arranged_Results=arrange_results(results_bathpath, save_analyzed_results_basepath)

    ## Save the results for each task.
    analyzed_results_path=os.path.join(save_analyzed_results_basepath, "arranged_results_analyze_imagetype.xlsx")
    writer = pd.ExcelWriter(analyzed_results_path)

    ## Analyze the results.
    for task, task_results_df in Arranged_Results.groupby(['task'], sort=True):
        print("\n\n ***********  task={} ******************".format(task))

        task_results_df["description"]=task_results_df["image_type"]+"_"+task_results_df["harmonization_method"]

        ## plot
        plt.figure(figsize=(20,6))
        plt.xticks(size=12)
        plt.yticks(size=12)
        plt.ylim((0, 1.1))
        plt.xlabel('model_name',size=16)
        plt.ylabel( 'mean_AUC',size=16)
        ax=sns.barplot(x="model_name", y="mean_AUC", hue="description", data=task_results_df, palette="viridis")
        plt.xticks(rotation=15)

        for p in ax.patches:
            color=p.get_facecolor()
            box = p.get_bbox()
            ax.annotate("%.4f" % p.get_height(), xy=((box.x0 + box.x1)/2-0.02, p.get_height()+0.02), color=color, 
                        rotation=90, fontsize=8, weight='bold')

        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width , box.height* 0.8])
        ax.legend(loc='center left', bbox_to_anchor=(0.1, 1.2), ncol=4)
        plt.subplots_adjust(left=0.07, bottom=0.2, right=0.98, top=0.75, wspace =0, hspace =0.5)
        plt.savefig(os.path.join(save_analyzed_results_basepath, task.replace(".", "-")+".jpeg"), dpi=600)
        plt.show()


        #save to the excel
        task_results_df.sort_values("mean_AUC", ascending=False, inplace=True)
        task_results_df.to_excel(writer, sheet_name=task)
    writer.save()


"""
Analyze the impact of image types.
"""
def analyze_impact_of_clinicalInfo(results_bathpath, save_analyzed_results_basepath, imagetype_dict, imagetype_statistic_method):
    ## makedir
    save_analyzed_results_basepath=save_analyzed_results_basepath+"_"+imagetype_statistic_method
    if not os.path.exists(save_analyzed_results_basepath):
        os.makedirs(save_analyzed_results_basepath)
        
    ## Arrange the results
    Arranged_Results=arrange_results(results_bathpath, save_analyzed_results_basepath)
    if imagetype_statistic_method=="useBestImageType":
        Arranged_Results["keep_imagetype"]=Arranged_Results[["experiment_class", "image_type"]].apply(lambda x: imagetype_dict[x["experiment_class"]]==x["image_type"], axis=1)
        Arranged_Results=Arranged_Results[Arranged_Results["keep_imagetype"]==True]

    ## Save the results for each task.
    analyzed_results_path=os.path.join(save_analyzed_results_basepath, "arranged_results_analyze_clinicalInfo.xlsx")
    writer = pd.ExcelWriter(analyzed_results_path)

    ## Analyze the results.
    for basic_task, task_results_df in Arranged_Results.groupby(['base_task'], sort=True):
        print("\n\n ***********  basic_task={} ******************".format(basic_task))

        task_results_df["clinicalInfo"]=task_results_df[["task"]].apply(lambda x: x["task"].split("_")[-1], axis=1) 

        ## plot
        plt.figure(figsize=(20,6))
        plt.xticks(size=12)
        plt.yticks(size=12)
        plt.ylim((0, 1.1))
        plt.xlabel('model_name',size=16)
        plt.ylabel( 'mean_AUC',size=16)
        ax=sns.barplot(x="model_name", y="mean_AUC", hue="clinicalInfo", data=task_results_df, palette="viridis")
        plt.xticks(rotation=15)

        for p in ax.patches:
            color=p.get_facecolor()
            box = p.get_bbox()
            ax.annotate("%.4f" % p.get_height(), xy=((box.x0 + box.x1)/2-0.02, p.get_height()+0.02), color=color, 
                        rotation=90, fontsize=8, weight='bold')

        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width , box.height* 0.8])
        ax.legend(loc='center left', bbox_to_anchor=(0.45, 1.2), ncol=4)
        plt.subplots_adjust(left=0.07, bottom=0.2, right=0.98, top=0.75, wspace =0, hspace =0.5)
        plt.savefig(os.path.join(save_analyzed_results_basepath, basic_task+".jpeg"), dpi=600)
        plt.show()


        #save to the excel
        task_results_df.sort_values("mean_AUC", ascending=False, inplace=True)
        task_results_df.to_excel(writer, sheet_name=basic_task)
    writer.save()

### Main

In [None]:
basic_settings=get_basic_settings()
results_bathpath=basic_settings["results_bathpath"]

## Analyze the impact of image types
save_analyzed_results_basepath=results_bathpath+"_ImpactOfImageTypes"
analyze_impact_of_imagetypes(results_bathpath, save_analyzed_results_basepath)

In [None]:
basic_settings=get_basic_settings()
results_bathpath=basic_settings["results_bathpath"]
imagetype_dict=basic_settings["imagetype_dict"]

## Analyze the impact of clinical info
save_analyzed_results_basepath=results_bathpath+"_ImpactOfClinicalInfo"
analyze_impact_of_clinicalInfo(results_bathpath, save_analyzed_results_basepath, imagetype_dict, imagetype_statistic_method="useBestImageType")
analyze_impact_of_clinicalInfo(results_bathpath, save_analyzed_results_basepath, imagetype_dict, imagetype_statistic_method="allImageTypes")