### Visualize the variable ratio and cross table of the data.


In [None]:
import os
import pandas as pd

## for plots
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches
from palettable.colorbrewer.diverging import Spectral_8

import sys
sys.path.append("../")
from utils.myUtils import mkdir
from mySettings import get_variable_ratio_visualization_setting_dict

In [None]:
def change_bar_width(ax, new_width) :
    """
    Change the bar width.
    """
    for patch in ax.patches :
        current_width = patch.get_width()
        diff = current_width - new_width
        patch.set_width(new_width)
        patch.set_x(patch.get_x() + diff * .5)
        
    
def visualize_each_category_ratio(data_df, x, y, save_basepath, save_prefix):
    """
    Compare two category class in the dataframe.
    """
    # ensure the order ["TCGA-GBM", "TCGA-LGG"] in the plot
    order=data_df[x].value_counts().index
    order=["TCGA-GBM", "TCGA-LGG"] if "TCGA-GBM" in order else order
    
    # plot the data
    if y is None:
        g=sns.catplot(x=x, data=data_df, kind='count', order=order)
    else:
        g=sns.catplot(x=x, hue=y, data=data_df, kind='count', order=order)  
      
    #change bar width
    ax = g.facet_axis(0,0)
    change_bar_width(ax, new_width=0.4)
    
    #add text on the bar
    for p in ax.patches:       
        box = p.get_bbox()
        ax.text((box.x0 + box.x1)/2-0.04, 
                p.get_height()+0.2, 
                '{0:.0f}'.format(p.get_height()), 
                color='black', rotation='horizontal', size='large')
        
    # the figure setting
    fig=g.fig
    ax.grid(axis="y")
    if save_prefix=="train_":
        g.set(ylim=(0, 90))
    elif save_prefix=="test_":
        g.set(ylim=(0, 45))
    
    plt.tight_layout()
    # save the plot
    if y is None:
        #plt.suptitle(column_name, fontsize=20)
        plt.savefig(save_basepath+"/"+save_prefix+x.replace(".","_")+"_Counts.jpeg")
    else:
        #fig.suptitle(x+"   vs   "+y, fontsize=20)
        fig.savefig(save_basepath+"/"+save_prefix+x.replace(".","_")+"_vs_"+y.replace(".","_")+"_Counts.jpeg")
        
    plt.show()
    
    
def plot_crosstab(cross_table, stacked, save_plot_path):
    """
    Plot cross table results.
    """
    colors = Spectral_8.hex_colors
    colors.reverse()    
    # plot the cross table.
    ax=cross_table.plot.bar(stacked=stacked, xlabel="", color=colors) 
    plt.xticks(rotation=360)
    plt.ylabel("Number of Patient")
    plt.grid(True)
    plt.legend(loc="best")
    #ax.legend_.remove()
    
    #change bar width
    if stacked:
        change_bar_width(ax, new_width=0.4)
    
    # add text! Patches is everything inside of the chart.
    for rect in ax.patches:
        # Find where everything is located
        height = rect.get_height()
        width = rect.get_width()
        x = rect.get_x()
        y = rect.get_y()

        # The height of the bar is the data value and can be used as the label
        label_text = int(height)  # f'{height:.2f}' to format decimal values

        # ax.text(x, y, text)
        label_x = x + width / 2
        label_y = y + height / 2

        # plot only when height is greater than specified value
        if height > 0:
            ax.text(label_x, label_y, label_text, ha='center', va='center', fontsize=12)   
        
    # save plots   
    plt.tight_layout()
    plt.savefig(save_plot_path)
    plt.show()

In [None]:
def visualize_variable_ratio(data_excel_path, crosstab_setting_dict, visualize_category_setting_list, save_basepath):
    """
    Visualize the variable ratios.
    """
    
    # read the data
    data_df=pd.read_excel(data_excel_path, index_col=0)
    
    #Rename the Tumor Grade column.
    if "Study" in data_df.columns:
        data_df["Tumor Grade"]=data_df["Study"].map({"Glioblastoma multiforme": "GBM",
                                                        "Brain Lower Grade Glioma": "LGG"})
     
    # Rename the IDH mutant status    
    if "IDH.status" in data_df.columns:
        data_df["IDH.status"]=data_df["IDH.status"].map({"Mutant": "IDH mutant",
                                                        "WT": "IDH wild-type"})
        
    # Rename the 1p/19q mutant status 
    if "X1p.19q.codeletion" in data_df.columns:
        data_df["X1p.19q.codeletion"]=data_df["X1p.19q.codeletion"].map({"codel": "1p/19q codeleted",
                                                                         "non-codel": "1p/19q intact"})
    
    # Rename the MGMT methylated status
    if "MGMT.promoter.status" in data_df.columns:
        data_df["MGMT.promoter.status"]=data_df["MGMT.promoter.status"].map({"Methylated": "MGMT methylated",
                                                                         "Unmethylated": "MGMT unmethylated"})
    
    # show and show the cross table;
    for crosstab_setting_name, crosstab_setting in crosstab_setting_dict.items():
        #show the cross table
        crosstab_index=[data_df[index] for index in crosstab_setting["index"]]
        crosstab_columns=[data_df[column] for column in crosstab_setting["columns"]]
        cross_table_marginTrue = pd.crosstab(crosstab_index, crosstab_columns, margins = True)
        print("\n---- Cross tabel -----\n{}".format(cross_table_marginTrue))

        # Plot the cross table
        stacked=crosstab_setting["stacked_for_plots"]
        cross_table = pd.crosstab(crosstab_index, crosstab_columns, margins = False)
        save_plot_path=save_basepath+"/crosstab_"+crosstab_setting_name+".jpeg"
        plot_crosstab(cross_table, stacked=stacked, save_plot_path=save_plot_path)
    
    # plot the rario of the category data
    for category_setting in visualize_category_setting_list:
        visualize_each_category_ratio(data_df, x=category_setting["x"], y=category_setting["hue"], save_basepath=save_basepath, save_prefix="")
    

#### Main

In [None]:
variable_ratio_visualization_setting_dict=get_variable_ratio_visualization_setting_dict()
for setting_name, variable_ratio_visualization_setting in variable_ratio_visualization_setting_dict.items():
    print("\n\n==================== {} ==============".format(setting_name))
    
    #read settings
    data_excel_path=variable_ratio_visualization_setting["data_excel_path"]
    crosstab_setting_dict=variable_ratio_visualization_setting["crosstab_setting_dict"]
    visualize_category_setting_list=variable_ratio_visualization_setting["visualize_category_setting_list"]
    
    # ensure the existence of the folder for saving results.   
    save_basepath=variable_ratio_visualization_setting["save_basepath"]
    save_basepath=os.path.join(save_basepath, "visualization_data_"+setting_name)
    mkdir(save_basepath)
    
    # visualize the data.
    visualize_variable_ratio(data_excel_path, crosstab_setting_dict, visualize_category_setting_list, save_basepath)
    
    
    