In [110]:
from cust_functions.explain_helper import *
import pandas as pd 

In [111]:
##
# Two little functions needed to compute the number of shared important proteins/pathways
##

def common_features_nodes(df1 : pd.DataFrame, df2 : pd.DataFrame, protein : bool, out_of_top: int):

    if protein:
        protein_or_pathway = 'Protein'
    else: 
        protein_or_pathway = 'Pathway'

    merged_df = pd.merge(df1.head(out_of_top), df2.head(out_of_top), on=protein_or_pathway, how='inner')

    return merged_df

In [112]:
def overlap_matrix(list_of_scores : list, list_of_names : list, protein: bool, out_of_top: int):
    Pairwise_matrix = pd.DataFrame(index=list_of_names, columns=list_of_names)

    for i, df1 in enumerate(list_of_scores):
        for j, df2 in enumerate(list_of_scores):
            Pairwise_matrix.iloc[i,j] = len(common_features_nodes(df1, df2, protein, out_of_top))

    return(Pairwise_matrix)

## 1. Import data

In [113]:
##
# GNNs: Create aggregate importance scores from the five models generated during Cross Validation 
##

# ResGCN on AKI data
Features_AKI_ResGCN = create_avg_sd_df('./explanations/AKI_ResGCN.pkl', True, True)
Pathways_AKI_ResGCN = create_avg_sd_df('./explanations/AKI_ResGCN.pkl', False, True)
Features_Pert_AKI_ResGCN = create_avg_sd_df('./explanations/Pert_AKI_ResGCN.pkl', True, True)
Pathways_Pert_AKI_ResGCN = create_avg_sd_df('./explanations/Pert_AKI_ResGCN.pkl', False, True)

# ResGCN on Covid data 
Features_Covid_ResGCN = create_avg_sd_df('./explanations/Covid_ResGCN.pkl', True, True)
Pathways_Covid_ResGCN = create_avg_sd_df('./explanations/Covid_ResGCN.pkl', False, True)
Features_Pert_Covid_ResGCN = create_avg_sd_df('./explanations/Pert_Covid_ResGCN.pkl', True, True)
Pathways_Pert_Covid_ResGCN = create_avg_sd_df('./explanations/Pert_Covid_ResGCN.pkl', False, True)

# ResGAT on AKI data
Features_AKI_ResGAT = create_avg_sd_df('./explanations/AKI_ResGAT.pkl', True, True)
Pathways_AKI_ResGAT = create_avg_sd_df('./explanations/AKI_ResGAT.pkl', False, True)

# ResGAT on Covid data
Features_Covid_ResGAT = create_avg_sd_df('./explanations/Covid_ResGAT.pkl', True, True)
Pathways_Covid_ResGAT = create_avg_sd_df('./explanations/Covid_ResGAT.pkl', False, True)

In [None]:
##
# Import protein/pathway importance from the BINN model
##

Features_AKI_BINN = pd.read_csv('./BINN_Explanations/top_30_proteins_binn_aki.csv')
Pathways_AKI_BINN = pd.read_csv('./BINN_Explanations/top_30_pathways_binn_aki.csv')
Features_Covid_BINN = pd.read_csv('./BINN_Explanations/top_30_proteins_binn_covid.csv')
Pathways_Covid_BINN = pd.read_csv('./BINN_Explanations/top_30_pathways_binn_covid.csv')

Features_AKI_BINN.rename(columns={'source': 'Protein'}, inplace=True)
Pathways_AKI_BINN.rename(columns={'source': 'Pathway'}, inplace=True)
Features_Covid_BINN.rename(columns={'source': 'Protein'}, inplace=True)
Pathways_Covid_BINN.rename(columns={'source': 'Pathway'}, inplace=True)

In [None]:
##
# Import protein importance from the Random Forest and AdaBoost model
##


## 2. Compute the common proteins / pathways

In [129]:
##
# AKI data / proteins
##

AKI_features = [Features_AKI_ResGCN, Features_Pert_AKI_ResGCN, Features_AKI_ResGAT, Features_AKI_BINN]
names = ['AKI_ResGCN', 'Pert_AKI_ResGCN', 'AKI_ResGAT', 'AKI_BINN']
overlap_matrix(AKI_features, names, True, 30)

Unnamed: 0,AKI_ResGCN,Pert_AKI_ResGCN,AKI_ResGAT,AKI_BINN
AKI_ResGCN,30,12,14,3
Pert_AKI_ResGCN,12,30,8,5
AKI_ResGAT,14,8,30,5
AKI_BINN,3,5,5,30


In [130]:
##
# AKI data / pathways
##

AKI_pathways = [Pathways_AKI_ResGCN, Pathways_Pert_AKI_ResGCN, Pathways_AKI_ResGAT, Pathways_AKI_BINN]
names = ['AKI_ResGCN', 'Pert_AKI_ResGCN', 'AKI_ResGAT', 'AKI_BINN']
overlap_matrix(AKI_pathways, names, False, 30)

Unnamed: 0,AKI_ResGCN,Pert_AKI_ResGCN,AKI_ResGAT,AKI_BINN
AKI_ResGCN,30,6,24,6
Pert_AKI_ResGCN,6,30,5,2
AKI_ResGAT,24,5,30,6
AKI_BINN,6,2,6,32


In [131]:
##
# Covid data / proteins
##

Covid_features = [Features_Covid_ResGCN, Features_Pert_Covid_ResGCN, Features_Covid_ResGAT, Features_Covid_BINN]
names = ['Covid_ResGCN', 'Pert_Covid_ResGCN', 'Covid_ResGAT', 'Covid_BINN']
overlap_matrix(Covid_features, names, True, 30)

Unnamed: 0,Covid_ResGCN,Pert_Covid_ResGCN,Covid_ResGAT,Covid_BINN
Covid_ResGCN,30,17,16,13
Pert_Covid_ResGCN,17,30,19,12
Covid_ResGAT,16,19,30,12
Covid_BINN,13,12,12,30


In [132]:
##
# Covid data / pathways
##

Covid_pathways = [Pathways_Covid_ResGCN, Pathways_Pert_Covid_ResGCN, Pathways_Covid_ResGAT, Pathways_Covid_BINN]
names = ['Covid_ResGCN', 'Pert_Covid_ResGCN', 'Covid_ResGAT', 'Covid_BINN']
overlap_matrix(Covid_pathways, names, False, 30)

Unnamed: 0,Covid_ResGCN,Pert_Covid_ResGCN,Covid_ResGAT,Covid_BINN
Covid_ResGCN,30,5,26,6
Pert_Covid_ResGCN,5,30,4,2
Covid_ResGAT,26,4,30,6
Covid_BINN,6,2,6,30
