In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [2]:
def get_tuned_params_dict(lines):
    
    tuned_params = {}
    for hp in lines[6:]:
        if 'Score' in hp:
            break

        hp_name, hp_choice = hp.split(':')
        hp_choice = hp_choice[1:-1]

        if hp_name in ["dropout", "learning_rate"]:
            hp_choice = float(hp_choice)
        else:
            hp_choice = int(hp_choice)

        tuned_params[hp_name] = hp_choice

    if tuned_params['num_layers'] == 2:
        tuned_params['layer_2_units'] = 0

    elif tuned_params['num_layers'] == 1:
        tuned_params['layer_1_units'] = 0
        tuned_params['layer_2_units'] = 0
    
    return tuned_params


def get_gnn_neurons_hp(cancer, strategy, gnn):
    
    neurons = {
        "layer_0_units": [],
        "layer_1_units": [],
        "layer_2_units": []
    }

    for th in ["001", "005", "01", "025", "05", "075", "09", "095", "099"]:
        
        file = f"C:/Users/colombelli/Desktop/TCC/experiments_extra_40/{cancer}/{strategy}/{th}/{gnn}_tunner_best_results.txt"
        with open(file) as f:
            lines = f.readlines()
        tuned_params = get_tuned_params_dict(lines)
        
        try:
            neurons['layer_0_units'].append(tuned_params['layer_0_units'])
            neurons['layer_1_units'].append(tuned_params['layer_1_units'])
            neurons['layer_2_units'].append(tuned_params['layer_2_units'])
        except:
            for line in lines:
                print(line)
            raise Exception("Error")

    df = pd.DataFrame(neurons, index=["0.01", "0.05", "0.1", "0.25", "0.5", "0.75", "0.9", "0.95", "0.99"])
    return df


def get_mlp_neurons_hp(cancer):
    
    file = f"C:/Users/colombelli/Desktop/TCC/experiments_extra_40/{cancer}/mlp/mlp_tunner_best_results.txt"
    with open(file) as f:
        lines = f.readlines()
    tuned_params = get_tuned_params_dict(lines)
    
    neurons = {}
    neurons['layer_0_units'] = [tuned_params['layer_0_units']]
    neurons['layer_1_units'] = [tuned_params['layer_1_units']]
    neurons['layer_2_units'] = [tuned_params['layer_2_units']]

    df = pd.DataFrame(neurons, index=["0.01", "0.05", "0.1", "0.25", "0.5", "0.75", "0.9", "0.95", "0.99"])
    return df



# Method from <https://stackoverflow.com/questions/22787209/how-to-have-clusters-of-stacked-bars-with-python-pandas>
def plot_clustered_stacked(dfall, labels=None, title="multiple stacked bar plot",  H="/", xlabel=None, ylabel=None, **kwargs):
    """Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot. 
        labels is a list of the names of the dataframe, used for the legend
        title is a string for the title of the plot
        H is the hatch used for identification of the different dataframe"""
    
    sns.set_style("darkgrid")
    
    n_df = len(dfall)
    n_col = len(dfall[0].columns) 
    n_ind = len(dfall[0].index)
    axe = plt.subplot(111)

    for df in dfall : # for each data frame
        axe = df.plot(kind="bar",
                      stacked=True,
                      ax=axe,
                      legend=False,
                      grid=False,
                      edgecolor='black', linewidth=1, 
                      **kwargs)  # make bar plots

    h,l = axe.get_legend_handles_labels() # get the handles we want to modify
    for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df
        for j, pa in enumerate(h[i:i+n_col]):
            for rect in pa.patches: # for each index
                rect.set_x(rect.get_x() + 1 / float(n_df + 1) * i / float(n_col))
                rect.set_hatch(H * int(i / n_col)) #edited part     
                rect.set_width(1 / float(n_df + 1))

    axe.set_xticks((np.arange(0, 2 * n_ind, 2) + 1 / float(n_df + 1)) / 2.)
    axe.set_xticklabels(df.index, rotation = 0)
    axe.set_title(title, fontsize=17)

    # Add invisible data to add another legend
    n=[]        
    for i in range(n_df):
        n.append(axe.bar(0, 0, color="gray", hatch=H * i))

    l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])
    if labels is not None:
        l2 = plt.legend(n, labels, loc=[1.01, 0.1]) 
    axe.add_artist(l1)
    plt.xlabel(xlabel, fontsize=16)
    plt.ylabel(ylabel, fontsize=16)
    return axe



def plot_neurons_for_cancer(cancer, strategy):
    
    if strategy == "CGEN":
        strategy_dir = "correlation" 
    elif strategy == "CMON":
        strategy_dir = "correlation_multi_omics"
    elif strategy == "SNF":
        strategy_dir = "snf"
    else:
        raise Exception('Invalid strategy.')
        
    df_gat_coad_cgen = get_gnn_neurons_hp(cancer, strategy_dir, "gat")
    df_gcn_coad_cgen = get_gnn_neurons_hp(cancer, strategy_dir, "gcn")
    df_mlp_coad = get_mlp_neurons_hp(cancer)
    
    ax = plot_clustered_stacked(
    [df_mlp_coad, df_gat_coad_cgen, df_gcn_coad_cgen],
    ["MLP", "GAT", "GCN"],
    title=f"{strategy} tuned number of neurons ({cancer})",
    xlabel="Threshold (categorized)",
    ylabel="Number of neurons")
    fig = ax.figure

    fig.set_size_inches(14.5, 8.5)
    ax.set_ylim([0, 400])
    
    file_name = f"C:/Users/colombelli/Desktop/TCC/experiments_extra_40/hp_plots/{cancer}_{strategy}.pdf"
    fig.savefig(file_name, bbox_inches='tight', dpi=100)
    plt.close()

In [28]:
df = get_gnn_neurons_hp("KIRC", "correlation_multi_omics", "GAT")
df

Unnamed: 0,layer_0_units,layer_1_units,layer_2_units
0.01,32,0,0
0.05,64,128,32
0.1,128,64,0
0.25,32,128,0
0.5,128,32,0
0.75,32,0,0
0.9,128,0,0
0.95,32,64,0
0.99,32,64,32


In [10]:
plot_neurons_for_cancer("KIRC", "CGEN")
plot_neurons_for_cancer("KIRC", "CMON")
plot_neurons_for_cancer("KIRC", "SNF")

plot_neurons_for_cancer("COAD", "CGEN")
plot_neurons_for_cancer("COAD", "CMON")
plot_neurons_for_cancer("COAD", "SNF")

plot_neurons_for_cancer("LUAD", "CGEN")
plot_neurons_for_cancer("LUAD", "CMON")
plot_neurons_for_cancer("LUAD", "SNF")

# Focal Loss investigation

In [25]:
def get_tuned_params_dict(lines):
    
    tuned_params = {}
    for hp in lines[6:]:
        if 'Score' in hp:
            break

        hp_name, hp_choice = hp.split(':')
        hp_choice = hp_choice[1:-1]

        if hp_name in ["dropout", "learning_rate"]:
            hp_choice = float(hp_choice)
        else:
            hp_choice = int(hp_choice)

        tuned_params[hp_name] = hp_choice

    if tuned_params['num_layers'] == 2:
        tuned_params['layer_2_units'] = 0

    elif tuned_params['num_layers'] == 1:
        tuned_params['layer_1_units'] = 0
        tuned_params['layer_2_units'] = 0
    
    return tuned_params



def get_gnn_gammas_hp(cancer, strategy, gnn):
    
    gammas = []
    for th in ["001", "005", "01", "025", "05", "075", "09", "095", "099"]:
        
        file = f"C:/Users/colombelli/Desktop/TCC/experiments_extra_40/{cancer}/{strategy}/{th}/{gnn}_tunner_best_results.txt"
        with open(file) as f:
            lines = f.readlines()
        tuned_params = get_tuned_params_dict(lines)
        
        try:
            gammas.append(tuned_params['gamma'])
        except:
            for line in lines:
                print(line)
            raise Exception("Error")

    return gammas


def get_mlp_gammas_hp(cancer):
    
    file = f"C:/Users/colombelli/Desktop/TCC/experiments_extra_40/{cancer}/mlp/mlp_tunner_best_results.txt"
    with open(file) as f:
        lines = f.readlines()
    tuned_params = get_tuned_params_dict(lines)
    
    gammas = [tuned_params['gamma']] * 9
    return gammas


def get_gammas_hp(cancer, strategy):
    
    gammas = {
        "MLP": get_mlp_gammas_hp(cancer),
        "GAT": get_gnn_gammas_hp(cancer, strategy, "gat"),
        "GCN": get_gnn_gammas_hp(cancer, strategy, "gcn")
    }

    df = pd.DataFrame(gammas, index=["0.01", "0.05", "0.1", "0.25", "0.5", "0.75", "0.9", "0.95", "0.99"])
    return df


def plot_gammas(cancer, strategy):
    
    if strategy == "CGEN":
        strategy_dir = "correlation" 
    elif strategy == "CMON":
        strategy_dir = "correlation_multi_omics"
    elif strategy == "SNF":
        strategy_dir = "snf"
    else:
        raise Exception('Invalid strategy.')
    
    
    sns.set_style("darkgrid")
    
    df = get_gammas_hp(cancer, strategy_dir)
    
    gammas_plot = sns.lineplot(data=df, markers=True, markersize=10)
    gammas_plot.set_yticks([0, 1, 2])
    gammas_plot.figure.set_size_inches(8.5, 3)
    gammas_plot.set_title(f"{strategy} tuned Focal Loss' γ ({cancer})", fontsize=10)
    plt.xlabel("Threshold (categorized)", fontsize=8)
    plt.ylabel("γ (gamma)", fontsize=8)

    file_name = f"C:/Users/colombelli/Desktop/TCC/experiments_extra_40/hp_plots/gammas/{cancer}_{strategy}.pdf"
    gammas_plot.figure.savefig(file_name, bbox_inches='tight', dpi=100)
    plt.close()

In [51]:
plot_gammas("KIRC", "CGEN")
plot_gammas("KIRC", "CMON")
plot_gammas("KIRC", "SNF")

plot_gammas("COAD", "CGEN")
plot_gammas("COAD", "CMON")
plot_gammas("COAD", "SNF")

plot_gammas("LUAD", "CGEN")
plot_gammas("LUAD", "CMON")
plot_gammas("LUAD", "SNF")

# Attention heads investigation

In [1]:
def get_tuned_params_dict(lines):
    
    tuned_params = {}
    for hp in lines[6:]:
        if 'Score' in hp:
            break

        hp_name, hp_choice = hp.split(':')
        hp_choice = hp_choice[1:-1]

        if hp_name in ["dropout", "learning_rate"]:
            hp_choice = float(hp_choice)
        else:
            hp_choice = int(hp_choice)

        tuned_params[hp_name] = hp_choice

    if tuned_params['num_layers'] == 2:
        tuned_params['layer_2_units'] = 0

    elif tuned_params['num_layers'] == 1:
        tuned_params['layer_1_units'] = 0
        tuned_params['layer_2_units'] = 0
    
    return tuned_params



def get_attention_heads_hp(cancer, strategy):
    
    heads = []
    for th in ["001", "005", "01", "025", "05", "075", "09", "095", "099"]:
        
        file = f"C:/Users/colombelli/Desktop/TCC/experiments/{cancer}/{strategy}/{th}/gat_tunner_best_results.txt"
        with open(file) as f:
            lines = f.readlines()
        tuned_params = get_tuned_params_dict(lines)
        
        try:
            heads.append(tuned_params['attention_heads'])
        except:
            for line in lines:
                print(line)
            raise Exception("Error")

    return heads

In [4]:
h1 = get_attention_heads_hp('KIRC', 'correlation')
h2 = get_attention_heads_hp('KIRC', 'correlation_multi_omics')
h3 = get_attention_heads_hp('KIRC', 'snf')

for h in [h1, h2, h3]:
    print(h)

[2, 8, 8, 8, 8, 2, 8, 2, 8]
[2, 4, 2, 8, 4, 4, 2, 2, 2]
[8, 2, 2, 8, 4, 8, 8, 8, 8]


In [5]:
h1 = get_attention_heads_hp('COAD', 'correlation')
h2 = get_attention_heads_hp('COAD', 'correlation_multi_omics')
h3 = get_attention_heads_hp('COAD', 'snf')

for h in [h1, h2, h3]:
    print(h)

[4, 2, 8, 2, 8, 8, 8, 2, 4]
[2, 2, 4, 8, 2, 4, 4, 4, 2]
[4, 8, 2, 4, 2, 4, 2, 4, 2]


In [6]:
h1 = get_attention_heads_hp('LUAD', 'correlation')
h2 = get_attention_heads_hp('LUAD', 'correlation_multi_omics')
h3 = get_attention_heads_hp('LUAD', 'snf')

for h in [h1, h2, h3]:
    print(h)

[2, 8, 2, 4, 4, 8, 8, 8, 8]
[8, 8, 8, 2, 4, 8, 4, 8, 4]
[8, 4, 4, 8, 4, 2, 8, 4, 4]
