In [1]:
import pandas as pd
from igraph import Graph
import disease_process_proteins
import metrics_functions
import importlib
import numpy as np
from ast import literal_eval
from tqdm.notebook import tqdm

In [2]:
reactome_proteins_indexes_apid_huri_df = pd.read_csv("../../data/processed/reactome_proteins_indexes_apid_huri.csv", sep=',', header=0)
disgenet_proteins_indexes_apid_huri_df = pd.read_csv("../../data/processed/disgenet_prot_index_main_comp.csv", sep=',', header=0)

protein_ids_apid_huri_df = pd.read_csv("../../data/processed/protein_ids_apid_huri.csv", sep=',', header=0)
adj_matrix_apid_huri = np.load("../../data/processed/adjacency_matrix_apid_huri.npy")
ppi_80 = np.load("../../data/processed/ppis/ppis_red80_apid_huri.npy", allow_pickle=True)
ppi_80_protein = np.load("../../data/processed/ppis/ppis_red_protein80_apid_huri.npy", allow_pickle=True)

In [3]:
reactome_proteins_indexes_apid_huri_df['protein_index'] = reactome_proteins_indexes_apid_huri_df['protein_index'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['protein_index'] = disgenet_proteins_indexes_apid_huri_df['protein_index'].apply(literal_eval)
reactome_proteins_indexes_apid_huri_df['proteins_ids'] = reactome_proteins_indexes_apid_huri_df['proteins_ids'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['proteins_ids'] = disgenet_proteins_indexes_apid_huri_df['proteins_ids'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['conservative_module'] = disgenet_proteins_indexes_apid_huri_df['conservative_module'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['added_nodes'] = disgenet_proteins_indexes_apid_huri_df['added_nodes'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['conservative_module_ids'] = disgenet_proteins_indexes_apid_huri_df['conservative_module_ids'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['added_nodes_ids'] = disgenet_proteins_indexes_apid_huri_df['added_nodes_ids'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['main_component'] = disgenet_proteins_indexes_apid_huri_df['main_component'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['main_component_ids'] = disgenet_proteins_indexes_apid_huri_df['main_component_ids'].apply(literal_eval)

In [4]:
graph_apid_huri = Graph.Read_GML("../../data/processed/graph_apid_huri")

In [5]:
disgenet_prot_index_conservative_module = disgenet_proteins_indexes_apid_huri_df[['process', 'conservative_module_ids', 'conservative_module', 'len_cm']]
disgenet_prot_index_conservative_module.columns = ['process', 'proteins_ids', 'protein_index', 'n_proteins']

In [6]:
disgenet_prot_index_main_comp = disgenet_proteins_indexes_apid_huri_df[disgenet_proteins_indexes_apid_huri_df['increase']<0.4][['process', 'main_component_ids', 'main_component', 'len_main_component']]
disgenet_prot_index_main_comp.columns = ['process', 'proteins_ids', 'protein_index', 'n_proteins']

In [7]:
reactome_labels_df = pd.read_csv("../../data/processed/reactome_labels_apid_huri.csv", sep=',', names=reactome_proteins_indexes_apid_huri_df['process'].values)
reactome_labels_df['protein_id'] = graph_apid_huri.vs['name']
reactome_labels_df.set_index('protein_id', inplace=True)

disgenet_labels_df = pd.read_csv("../../data/processed/disgenet_filtered_labels_apid_huri.csv", sep=',', names=disgenet_prot_index_main_comp['process'].values)
disgenet_labels_df['protein_id'] = graph_apid_huri.vs['name']
disgenet_labels_df.set_index('protein_id', inplace=True)

disgenet_labels_conservative_df = pd.read_csv('../../data/processed/disgenet_conservative_labels_apid_huri.csv', names=disgenet_prot_index_conservative_module['process'].values)
disgenet_labels_conservative_df['protein_id'] = graph_apid_huri.vs['name']
disgenet_labels_conservative_df.set_index('protein_id', inplace=True)

In [9]:
process_rwr_whole = pd.read_csv("../../models/GAP-MINE/process/probability/rwr_whole.csv")
process_rwr_whole['new_proteins'] = process_rwr_whole['new_proteins'].apply(literal_eval)
disease_rwr_whole = pd.read_csv("../../models/GAP-MINE/disease/probability/rwr_whole.csv")
disease_rwr_whole['new_proteins'] = disease_rwr_whole['new_proteins'].apply(literal_eval)
disease_conservative_rwr_whole = pd.read_csv("../../models/GAP-MINE/disease/probability/rwr_conservative_whole.csv")
disease_conservative_rwr_whole['new_proteins'] = disease_conservative_rwr_whole['new_proteins'].apply(literal_eval)

In [186]:
disgenet_prot_index_main_comp.reset_index(inplace=True)

In [187]:
protein_list = set(list(disgenet_labels_df.sum(axis=1).replace(0, np.nan).dropna().index)+list(disgenet_labels_conservative_df.sum(axis=1).replace(0, np.nan).dropna().index)+list(reactome_labels_df.sum(axis=1).replace(0, np.nan).dropna().index))

In [188]:
sp = graph_apid_huri.shortest_paths(graph_apid_huri.vs['name'], protein_list)
sp_df = pd.DataFrame(sp, columns=protein_list)
sp_df.index = graph_apid_huri.vs['name']

In [189]:
import math
from tqdm.notebook import tqdm
tqdm.pandas()
def fp_addition(protein_indexes, sp_df, graph, clf_fp):
    clf_fp_proteins = clf_fp['new_proteins']
    min_sp = sp_df[~sp_df.index.isin(protein_indexes)][protein_indexes].min(axis=1)
    degree_values = graph.degree(sp_df[~sp_df.index.isin(protein_indexes)].index)
    log_degree_values = [math.log10(x) for x in degree_values]
    min_sp = pd.DataFrame(min_sp, columns=['sp'])
    min_sp['degree'] = log_degree_values
    min_sp['weight'] = min_sp['degree']/(10**min_sp['sp'])
    min_sp.loc[min_sp.index.isin(clf_fp_proteins), 'weight'] = 0
    min_sp['normalized_weight'] = min_sp['weight']/sum(min_sp['weight'])
    new_proteins = list(np.random.choice(min_sp.index, int(len(protein_indexes)*0.1), p=min_sp['normalized_weight']))
    new_proteins = new_proteins+protein_indexes
    new_proteins_index = [int(graph.vs.find(name=x)['id']) for x in new_proteins]
    return new_proteins, new_proteins_index

In [190]:
reactome_proteins_indexes_apid_huri_df[['fp_proteins', 'fp_proteins_index']] = reactome_proteins_indexes_apid_huri_df.progress_apply(lambda row: fp_addition(row['proteins_ids'], sp_df, graph_apid_huri, process_rwr_whole.loc[row.name,:]), axis=1, result_type='expand')
disgenet_prot_index_conservative_module[['fp_proteins', 'fp_proteins_index']] = disgenet_prot_index_conservative_module.progress_apply(lambda row: fp_addition(row['proteins_ids'], sp_df, graph_apid_huri, disease_conservative_rwr_whole.loc[row.name,:]), axis=1, result_type='expand')
disgenet_prot_index_main_comp[['fp_proteins', 'fp_proteins_index']] = disgenet_prot_index_main_comp.progress_apply(lambda row: fp_addition(row['proteins_ids'], sp_df, graph_apid_huri, disease_rwr_whole.loc[row.name,:]), axis=1, result_type='expand')

  0%|          | 0/429 [00:00<?, ?it/s]

  0%|          | 0/301 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


  0%|          | 0/203 [00:00<?, ?it/s]

In [191]:
reactome_proteins_indexes_apid_huri_df.to_csv('../../data/processed/reactome_protein_indexes_fp.csv', index=False)
disgenet_prot_index_conservative_module.to_csv('../../data/processed/disgenet_protein_indexes_conservative_fp.csv', index=False)
disgenet_prot_index_main_comp.to_csv('../../data/processed/disgenet_protein_indexes_fp.csv', index=False)

In [4]:
reactome_proteins_indexes_apid_huri_df = pd.read_csv('../../data/processed/reactome_protein_indexes_fp.csv')
disgenet_prot_index_conservative_module = pd.read_csv('../../data/processed/disgenet_protein_indexes_conservative_fp.csv')
disgenet_prot_index_main_comp = pd.read_csv('../../data/processed/disgenet_protein_indexes_fp.csv')

In [5]:
reactome_proteins_indexes_apid_huri_df['fp_proteins'] = reactome_proteins_indexes_apid_huri_df['fp_proteins'].apply(literal_eval)
disgenet_prot_index_conservative_module['fp_proteins'] = disgenet_prot_index_conservative_module['fp_proteins'].apply(literal_eval)
disgenet_prot_index_main_comp['fp_proteins'] = disgenet_prot_index_main_comp['fp_proteins'].apply(literal_eval)

reactome_proteins_indexes_apid_huri_df['fp_proteins_index'] = reactome_proteins_indexes_apid_huri_df['fp_proteins_index'].apply(literal_eval)
disgenet_prot_index_conservative_module['fp_proteins_index'] = disgenet_prot_index_conservative_module['fp_proteins_index'].apply(literal_eval)
disgenet_prot_index_main_comp['fp_proteins_index'] = disgenet_prot_index_main_comp['fp_proteins_index'].apply(literal_eval)

In [6]:
reactome_proteins_indexes_apid_huri_df.drop(columns=['proteins_ids', 'protein_index'], inplace=True)
reactome_proteins_indexes_apid_huri_df = reactome_proteins_indexes_apid_huri_df[['process', 'fp_proteins', 'fp_proteins_index']]
reactome_proteins_indexes_apid_huri_df.columns = ['process', 'proteins_ids', 'protein_index']
reactome_proteins_indexes_apid_huri_df['n_proteins'] = reactome_proteins_indexes_apid_huri_df['proteins_ids'].apply(lambda row: len(row))

In [7]:
disgenet_prot_index_conservative_module.drop(columns=['proteins_ids', 'protein_index'], inplace=True)
disgenet_prot_index_conservative_module = disgenet_prot_index_conservative_module[['process', 'fp_proteins', 'fp_proteins_index']]
disgenet_prot_index_conservative_module.columns = ['process', 'proteins_ids', 'protein_index']
disgenet_prot_index_conservative_module['n_proteins'] = disgenet_prot_index_conservative_module['proteins_ids'].apply(lambda row: len(row))

disgenet_prot_index_main_comp.drop(columns=['proteins_ids', 'protein_index'], inplace=True)
disgenet_prot_index_main_comp = disgenet_prot_index_main_comp[['process', 'fp_proteins', 'fp_proteins_index']]
disgenet_prot_index_main_comp.columns = ['process', 'proteins_ids', 'protein_index']
disgenet_prot_index_main_comp['n_proteins'] = disgenet_prot_index_main_comp['proteins_ids'].apply(lambda row: len(row))

# 1. Target Process Metrics

## 1.1. Hypergeometric Test

For a given protein and given process, the number of interactions formed between the protein and the proteins of the process are summed. The hypergeometric test is calculated given this number, the number total number of interactions of the protein,the number of proteins of the process and the total number of proteins. For an easier analysis, the p-values are presented in a -log10 form (higher -log10 means lower p-value).

In [13]:
importlib.reload(metrics_functions)
process_hypergeometric = metrics_functions.hypergeometric_test(graph_apid_huri, reactome_proteins_indexes_apid_huri_df, adj_matrix_apid_huri)
process_hypergeometric_df = pd.DataFrame.from_dict(process_hypergeometric, orient='index')
process_hypergeometric_df.to_csv('../../data/processed/metrics/process_hypergeometric_fp.csv')
process_hypergeometric_df.head()

  0%|          | 0/17204 [00:00<?, ?it/s]

Unnamed: 0,R-HSA-1031716,R-HSA-112379,R-HSA-112385,R-HSA-1168640,R-HSA-1214188,R-HSA-1234159,R-HSA-141409,R-HSA-141422,R-HSA-141431,R-HSA-141439,...,R-HSA-9670149,R-HSA-9698928,R-HSA-9710490,R-HSA-977224,R-HSA-983140,R-HSA-983147,R-HSA-983156,R-HSA-983157,R-HSA-983259,R-HSA-983266
A1BG,0.088609,0.067527,0.067527,2.840721,0.068636,2.659172,0.116385,0.116385,0.116385,2.263649,...,0.07862,0.068636,0.07751,0.066418,5.797865,6.257376,5.797865,5.797865,0.068636,0.068636
A1CF,0.12128,0.092424,0.092424,2.549507,0.093942,0.103051,0.159296,0.159296,0.159296,1.992492,...,0.107607,0.093942,0.106089,0.090907,0.41929,0.354814,0.41929,0.41929,0.093942,0.093942
A2M,0.706935,1.151082,1.151082,0.494507,4.196625,0.600683,0.928532,0.928532,0.928532,0.928532,...,2.261173,1.14361,3.896256,4.27931,3.742343,3.27644,2.804491,2.804491,1.14361,2.455746
A2ML1,0.055952,0.04264,0.04264,0.039139,0.04334,0.047543,0.073491,0.073491,0.073491,0.073491,...,0.049645,0.04334,0.048944,0.04194,0.193439,0.163693,0.193439,0.193439,0.04334,0.04334
A4GALT,0.018646,0.01421,0.01421,0.013043,0.014443,0.015844,0.024491,0.024491,0.024491,0.024491,...,0.016544,0.014443,0.016311,0.013977,0.064465,0.054552,0.064465,0.064465,0.014443,0.014443


In [15]:
importlib.reload(metrics_functions)
disease_hypergeometric = metrics_functions.hypergeometric_test(graph_apid_huri, disgenet_prot_index_main_comp, adj_matrix_apid_huri)
disease_hypergeometric_df = pd.DataFrame.from_dict(disease_hypergeometric, orient='index')
disease_hypergeometric_df.to_csv('../../data/processed/metrics/disease_hypergeometric_fp.csv')
disease_hypergeometric_df.head()

  0%|          | 0/17204 [00:00<?, ?it/s]

Unnamed: 0,C0001418,C0001973,C0002152,C0002395,C0002736,C0003873,C0004153,C0004238,C0004352,C0005684,...,C3714636,C3714758,C4277682,C4277690,C4279912,C4505456,C4552091,C4704862,C4707243,C4721507
A1BG,1.965745,1.319597,0.166483,0.138634,0.09083,0.278287,5.50596,1.674078,1.321389,1.865037,...,0.087499,0.199955,0.413327,0.158681,0.413327,4.734986,0.199955,0.199955,2.593476,0.113049
A1CF,1.712237,0.534825,0.227865,1.84742,0.124319,7.653947,2.144254,3.308358,0.533282,0.253816,...,0.11976,0.273679,0.565722,1.739271,0.565722,0.200416,0.273679,0.273679,0.110645,0.154731
A2M,8.168082,5.901446,8.089131,5.497651,1.039913,15.103255,6.969864,6.152206,3.794016,5.844829,...,3.606504,2.940223,8.031335,1.483925,9.414081,3.777455,1.993643,1.993643,3.794589,6.335558
A2ML1,2.365152,0.246741,0.105126,0.087541,0.057354,1.906908,0.061562,0.148848,0.246029,0.117098,...,0.055251,2.190072,1.593028,0.100199,1.593028,0.092462,2.190072,2.190072,0.051046,0.071385
A4GALT,0.034565,0.082228,0.035034,0.029173,0.019114,0.058561,0.020516,0.049604,0.08199,0.039024,...,0.018413,0.042077,2.518099,0.033392,2.518099,0.030813,0.042077,0.042077,0.017011,0.023789


In [16]:
importlib.reload(metrics_functions)
disease_hypergeometric_conservative = metrics_functions.hypergeometric_test(graph_apid_huri, disgenet_prot_index_conservative_module, adj_matrix_apid_huri)
disease_hypergeometric_conservative_df = pd.DataFrame.from_dict(disease_hypergeometric_conservative, orient='index')
disease_hypergeometric_conservative_df.to_csv('../../data/processed/metrics/disease_hypergeometric_conservative_fp.csv')
disease_hypergeometric_conservative_df.head()

  0%|          | 0/17204 [00:00<?, ?it/s]

Unnamed: 0,C0000786,C0000822,C0001418,C0001787,C0001973,C0002152,C0002395,C0002736,C0003873,C0004096,...,C4317123,C4505436,C4505456,C4552091,C4552766,C4704862,C4707243,C4721453,C4721507,C4722327
A1BG,0.124169,0.124169,2.131503,0.070854,5.879198,0.127506,0.107492,0.067527,0.20554,0.094161,...,0.125281,0.121945,5.079253,0.146428,0.124169,0.146428,0.061983,0.061983,0.083059,0.058657
A1CF,4.447841,4.447841,0.185178,0.096978,1.297371,0.174518,0.147124,0.092424,3.543378,0.128878,...,0.171473,0.166906,0.166906,0.200416,4.447841,0.200416,0.084836,0.084836,0.113683,0.080284
A2M,2.83468,2.83468,7.378553,2.409199,6.26331,4.188839,4.724197,0.538738,8.552422,1.030382,...,1.693783,1.721574,4.325877,1.004452,4.270083,1.168224,0.494507,1.192667,5.582004,0.467974
A2ML1,0.078406,0.078406,2.541582,0.044741,0.187768,0.080514,0.067876,0.04264,0.129788,0.059458,...,0.079109,0.077002,0.077002,0.092462,0.078406,0.092462,0.039139,0.039139,0.052448,0.037039
A4GALT,0.026129,0.026129,0.028471,0.01491,0.062575,0.026832,0.02262,0.01421,0.043253,0.019815,...,0.026363,0.025661,0.025661,0.030813,0.026129,0.030813,0.013043,0.013043,0.017478,0.012344


## 1.2 Shortest Paths

In [17]:
importlib.reload(metrics_functions)
process_shortest_paths = metrics_functions.shortest_paths(graph_apid_huri, reactome_proteins_indexes_apid_huri_df)
process_shortest_paths_df = pd.DataFrame.from_dict(process_shortest_paths)
process_shortest_paths_df = process_shortest_paths_df.rename(index=dict(zip(list(process_shortest_paths_df.index),list(graph_apid_huri.vs['name']))))
process_shortest_paths_df.to_csv('../../data/processed/metrics/process_shortest_paths_fp.csv')
print(process_shortest_paths_df.shape)
process_shortest_paths_df.head()

  0%|          | 0/5359 [00:00<?, ?it/s]

(17204, 5359)


Unnamed: 0,DISC1,GSDMD,FNBP1L,GPNMB,GABBR1,GTF2F2,BIN2,ALMS1,MRPS26,CEP89,...,NR3C1,MAP3K11,EIF3E,DNAJA2,ANAPC10,SPTA1,DYNLRB2,CTSC,TCF7L2,C4A
A1BG,3,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,2,2
A1CF,3,3,3,3,3,2,3,3,2,3,...,3,3,3,3,3,3,3,3,2,3
A2M,2,3,2,3,2,2,3,3,2,3,...,2,2,2,2,2,3,3,3,2,2
A2ML1,3,3,4,3,3,3,4,3,3,3,...,2,3,3,3,3,3,4,3,3,3
A4GALT,3,4,4,3,4,3,4,4,3,3,...,3,4,3,3,3,4,3,4,3,3


In [18]:
importlib.reload(metrics_functions)
disease_shortest_paths = metrics_functions.shortest_paths(graph_apid_huri, disgenet_prot_index_main_comp)
disease_shortest_paths_df = pd.DataFrame.from_dict(disease_shortest_paths)
disease_shortest_paths_df = disease_shortest_paths_df.rename(index=dict(zip(list(disease_shortest_paths_df.index),list(graph_apid_huri.vs['name']))))
disease_shortest_paths_df.to_csv('../../data/processed/metrics/disease_shortest_paths_fp.csv')
print(disease_shortest_paths_df.shape)
disease_shortest_paths_df.head()

  0%|          | 0/7199 [00:00<?, ?it/s]

(17204, 7199)


Unnamed: 0,DISC1,TCF25,DNAJB6,NOX5,CCNI,STIM1,ARHGEF28,ADAP1,MYL7,ATOH8,...,EIF3E,DNAJA2,ANAPC10,SLC5A8,EXOC3,C4A,TCF7L2,CTSC,FCHSD2,CD226
A1BG,3,3,3,3,3,3,4,3,3,4,...,3,3,3,3,3,2,2,3,3,4
A1CF,3,3,2,4,3,3,3,3,3,4,...,3,3,3,3,3,3,2,3,3,3
A2M,2,3,2,3,3,2,3,3,3,3,...,2,2,2,3,3,2,2,3,2,3
A2ML1,3,3,3,4,3,3,4,3,3,4,...,3,3,3,4,2,3,3,3,2,4
A4GALT,3,3,3,4,3,3,3,4,4,4,...,3,3,3,4,3,3,3,4,3,5


In [19]:
importlib.reload(metrics_functions)
disease_shortest_paths_conservative = metrics_functions.shortest_paths(graph_apid_huri, disgenet_prot_index_conservative_module)
disease_shortest_paths_conservative_df = pd.DataFrame.from_dict(disease_shortest_paths_conservative)
disease_shortest_paths_conservative_df = disease_shortest_paths_conservative_df.rename(index=dict(zip(list(disease_shortest_paths_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_shortest_paths_conservative_df.to_csv('../../data/processed/metrics/disease_shortest_paths_conservative_fp.csv')
print(disease_shortest_paths_conservative_df.shape)
disease_shortest_paths_conservative_df.head()

  0%|          | 0/6918 [00:00<?, ?it/s]

(17204, 6918)


Unnamed: 0,DISC1,DNAJB6,STIM1,ARHGEF28,CBLIF,MYL7,ATOH8,GPNMB,GABBR1,ATOH1,...,EIF3E,DNAJA2,ANAPC10,SLC5A8,EXOC3,C4A,TCF7L2,CTSC,FCHSD2,CD226
A1BG,3,3,3,4,3,3,4,3,3,3,...,3,3,3,3,3,2,2,3,3,4
A1CF,3,2,3,3,4,3,4,3,3,4,...,3,3,3,3,3,3,2,3,3,3
A2M,2,2,2,3,3,3,3,3,2,3,...,2,2,2,3,3,2,2,3,2,3
A2ML1,3,3,3,4,4,3,4,3,3,4,...,3,3,3,4,2,3,3,3,2,4
A4GALT,3,3,3,3,4,4,4,3,4,4,...,3,3,3,4,3,3,3,4,3,5


# 1.3 Closeness

In [20]:
importlib.reload(metrics_functions)
process_closeness = metrics_functions.closeness(process_shortest_paths_df, reactome_proteins_indexes_apid_huri_df)
process_closeness_df = pd.DataFrame.from_dict(process_closeness)
process_closeness_df = process_closeness_df.rename(index=dict(zip(list(process_closeness_df.index),list(graph_apid_huri.vs['name']))))
process_closeness_df.to_csv('../../data/processed/metrics/process_closeness_fp.csv')
print(process_closeness_df.shape)
process_closeness_df.head()

  0%|          | 0/429 [00:00<?, ?it/s]

(17204, 429)


Unnamed: 0,R-HSA-1031716,R-HSA-112379,R-HSA-112385,R-HSA-1168640,R-HSA-1214188,R-HSA-1234159,R-HSA-141409,R-HSA-141422,R-HSA-141431,R-HSA-141439,...,R-HSA-9670149,R-HSA-9698928,R-HSA-9710490,R-HSA-977224,R-HSA-983140,R-HSA-983147,R-HSA-983156,R-HSA-983157,R-HSA-983259,R-HSA-983266
A1BG,0.333333,0.342697,0.346591,0.397163,0.29108,0.386364,0.335463,0.339806,0.336538,0.339806,...,0.311404,0.28972,0.300429,0.289855,0.357607,0.357911,0.357607,0.358073,0.352273,0.350282
A1CF,0.333333,0.348571,0.348571,0.394366,0.29108,0.377778,0.34202,0.343137,0.344262,0.346535,...,0.310044,0.29108,0.297872,0.289855,0.347222,0.345185,0.348101,0.347661,0.352273,0.352273
A2M,0.386473,0.438849,0.442029,0.427481,0.375758,0.43038,0.387454,0.388889,0.390335,0.387454,...,0.396648,0.373494,0.391061,0.379747,0.409836,0.403114,0.409836,0.407407,0.418919,0.416107
A2ML1,0.316206,0.333333,0.333333,0.333333,0.279279,0.330097,0.325077,0.330189,0.329154,0.329154,...,0.295833,0.278027,0.286885,0.280374,0.326216,0.324965,0.326216,0.325059,0.328042,0.329787
A4GALT,0.28777,0.30198,0.300493,0.325581,0.257261,0.325359,0.311573,0.310651,0.307018,0.310651,...,0.272031,0.258333,0.260223,0.25641,0.302863,0.302597,0.301866,0.30253,0.303922,0.303922


In [21]:
importlib.reload(metrics_functions)
disease_closeness = metrics_functions.closeness(disease_shortest_paths_df, disgenet_prot_index_main_comp)
disease_closeness_df = pd.DataFrame.from_dict(disease_closeness)
disease_closeness_df = disease_closeness_df.rename(index=dict(zip(list(disease_closeness_df.index),list(graph_apid_huri.vs['name']))))
disease_closeness_df.to_csv('../../data/processed/metrics/disease_closeness_fp.csv')
print(disease_closeness_df.shape)
disease_closeness_df.head()

  0%|          | 0/203 [00:00<?, ?it/s]

(17204, 203)


Unnamed: 0,C0001418,C0001973,C0002152,C0002395,C0002736,C0003873,C0004153,C0004238,C0004352,C0005684,...,C3714636,C3714758,C4277682,C4277690,C4279912,C4505456,C4552091,C4704862,C4707243,C4721507
A1BG,0.359223,0.324977,0.340136,0.342466,0.345992,0.332447,0.349206,0.338658,0.334612,0.336016,...,0.325103,0.340265,0.335145,0.336471,0.336058,0.363636,0.337711,0.339623,0.34434,0.337748
A1CF,0.357488,0.333333,0.340909,0.357143,0.359649,0.344353,0.345098,0.353923,0.339494,0.345041,...,0.329167,0.34749,0.335753,0.336471,0.335145,0.352,0.345489,0.346821,0.352657,0.335526
A2M,0.431487,0.391499,0.413223,0.43554,0.418367,0.412541,0.431373,0.407692,0.403468,0.412346,...,0.39899,0.410959,0.407489,0.393939,0.406147,0.419048,0.406321,0.408163,0.431953,0.412955
A2ML1,0.331096,0.311665,0.31185,0.323834,0.322835,0.316857,0.313167,0.325153,0.316985,0.314501,...,0.312253,0.320285,0.317597,0.319196,0.3157,0.331658,0.319149,0.320856,0.311966,0.315789
A4GALT,0.312896,0.292154,0.307377,0.307125,0.319066,0.295159,0.303448,0.301994,0.290349,0.298214,...,0.291513,0.301003,0.299838,0.295455,0.302288,0.313539,0.300501,0.301003,0.297959,0.301775


In [22]:
importlib.reload(metrics_functions)
disease_closeness_conservative = metrics_functions.closeness(disease_shortest_paths_conservative_df, disgenet_prot_index_conservative_module)
disease_closeness_conservative_df = pd.DataFrame.from_dict(disease_closeness_conservative)
disease_closeness_conservative_df = disease_closeness_conservative_df.rename(index=dict(zip(list(disease_closeness_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_closeness_conservative_df.to_csv('../../data/processed/metrics/disease_closeness_conservative_fp.csv')
print(disease_closeness_conservative_df.shape)
disease_closeness_conservative_df.head()

  0%|          | 0/301 [00:00<?, ?it/s]

(17204, 301)


Unnamed: 0,C0000786,C0000822,C0001418,C0001787,C0001973,C0002152,C0002395,C0002736,C0003873,C0004096,...,C4317123,C4505436,C4505456,C4552091,C4552766,C4704862,C4707243,C4721453,C4721507,C4722327
A1BG,0.32,0.320917,0.355685,0.345946,0.317857,0.335277,0.342756,0.342697,0.329181,0.326923,...,0.322857,0.326409,0.365449,0.332494,0.32,0.332494,0.331361,0.337349,0.337838,0.339744
A1CF,0.339394,0.341463,0.356725,0.342246,0.327607,0.333333,0.354015,0.350575,0.3407,0.329457,...,0.321023,0.324484,0.353698,0.340206,0.338369,0.341085,0.345679,0.343558,0.333333,0.348684
A2M,0.391608,0.388889,0.42807,0.418301,0.377119,0.407801,0.431111,0.414966,0.40393,0.388128,...,0.380471,0.384615,0.426357,0.39521,0.390244,0.391691,0.414815,0.408759,0.409836,0.398496
A2ML1,0.303523,0.303523,0.322751,0.318408,0.303409,0.304233,0.320132,0.316062,0.3125,0.303571,...,0.302949,0.303867,0.33033,0.315789,0.301887,0.315789,0.309392,0.316384,0.3125,0.323171
A4GALT,0.277916,0.277916,0.307305,0.297674,0.287097,0.304233,0.304075,0.309645,0.289515,0.294118,...,0.290488,0.291777,0.310734,0.296629,0.278607,0.293333,0.28866,0.304348,0.302419,0.292818


## 1.4 Betweenness

In [23]:
importlib.reload(metrics_functions)
process_betweenness = metrics_functions.betweenness(process_shortest_paths_df, reactome_proteins_indexes_apid_huri_df, graph_apid_huri)
process_betweenness_df = pd.DataFrame.from_dict(process_betweenness)
process_betweenness_df = process_betweenness_df.rename(index=dict(zip(list(process_betweenness_df.index),list(graph_apid_huri.vs['name']))))
process_betweenness_df.fillna(value=0, inplace=True)
process_betweenness_df.to_csv('../../data/processed/metrics/process_betweenness_fp.csv')
print(process_betweenness_df.shape)
process_betweenness_df.head()

  0%|          | 0/429 [00:00<?, ?it/s]

(17204, 429)


Unnamed: 0,R-HSA-1031716,R-HSA-112379,R-HSA-112385,R-HSA-1168640,R-HSA-1214188,R-HSA-1234159,R-HSA-141409,R-HSA-141422,R-HSA-141431,R-HSA-141439,...,R-HSA-9670149,R-HSA-9698928,R-HSA-9710490,R-HSA-977224,R-HSA-983140,R-HSA-983147,R-HSA-983156,R-HSA-983157,R-HSA-983259,R-HSA-983266
A1BG,0.046835,0.0,0.0,0.0,0.0,0.003073,0.0,0.0,0.0,0.000183,...,0.001207,0.0,0.0,0.0,0.003052,0.002886,0.002893,0.002946,0.002644,0.002644
A1CF,0.028481,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A2M,0.050949,0.01694,0.002732,0.000649,0.001058,0.000439,0.07619,0.069414,0.073626,0.070696,...,0.004829,0.001586,0.001656,0.001695,0.001407,0.002294,0.001354,0.004353,0.025912,0.027499
A2ML1,0.010443,0.0,0.0,0.0,0.010576,0.0,0.005861,0.000733,0.000733,0.000549,...,0.011268,0.008461,0.010352,0.00904,0.005335,0.005587,0.006025,0.005096,0.0,0.0
A4GALT,0.005696,0.0,0.0,0.0,0.0,0.0,0.003846,0.0,0.0,0.0,...,0.00161,0.0,0.0,0.0,0.000876,7.4e-05,8e-05,5.3e-05,0.0,0.0


In [24]:
importlib.reload(metrics_functions)
disease_betweenness = metrics_functions.betweenness(disease_shortest_paths_df, disgenet_prot_index_main_comp, graph_apid_huri)
disease_betweenness_df = pd.DataFrame.from_dict(disease_betweenness)
disease_betweenness_df = disease_betweenness_df.rename(index=dict(zip(list(disease_betweenness_df.index),list(graph_apid_huri.vs['name']))))
disease_betweenness_df.fillna(value=0, inplace=True)
disease_betweenness_df.to_csv('../../data/processed/metrics/disease_betweenness_fp.csv')
print(disease_betweenness_df.shape)
disease_betweenness_df.head()

  0%|          | 0/203 [00:00<?, ?it/s]

(17204, 203)


Unnamed: 0,C0001418,C0001973,C0002152,C0002395,C0002736,C0003873,C0004153,C0004238,C0004352,C0005684,...,C3714636,C3714758,C4277682,C4277690,C4279912,C4505456,C4552091,C4704862,C4707243,C4721507
A1BG,0.013514,6.5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.4e-05,0.0,4.4e-05,0.0,0.0,0.0,0.0,0.0
A1CF,0.014433,0.004977,0.000984,0.004258,0.0,0.009253,0.0,0.010507,0.003277,0.010461,...,0.000325,0.007449,0.001685,0.002758,0.001714,0.002892,0.010366,0.007325,0.001142,0.000194
A2M,0.013514,0.000131,0.0,0.0,0.0,0.000257,0.0,0.0,1.6e-05,0.012842,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000683,0.0,0.0,0.0
A2ML1,0.099375,0.080262,0.019955,0.017677,0.04336,0.042506,0.024556,0.047259,0.08377,0.054181,...,0.114249,0.057232,0.043902,0.015857,0.041178,0.05517,0.065611,0.063128,0.130137,0.080761
A4GALT,0.011215,0.020385,0.004385,0.013548,0.007829,0.008386,0.002874,0.018868,0.029493,0.00909,...,0.025641,0.008132,0.008511,0.021767,0.008262,0.002429,0.008504,0.008132,0.008752,0.005242


In [25]:
importlib.reload(metrics_functions)
disease_betweenness_conservative = metrics_functions.betweenness(disease_shortest_paths_conservative_df, disgenet_prot_index_conservative_module, graph_apid_huri)
disease_betweenness_conservative_df = pd.DataFrame.from_dict(disease_betweenness_conservative)
disease_betweenness_conservative_df = disease_betweenness_conservative_df.rename(index=dict(zip(list(disease_betweenness_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_betweenness_conservative_df.fillna(value=0, inplace=True)
disease_betweenness_conservative_df.to_csv('../../data/processed/metrics/disease_betweenness_conservative_fp.csv')
print(disease_betweenness_conservative_df.shape)
disease_betweenness_conservative_df.head()

  0%|          | 0/301 [00:00<?, ?it/s]

(17204, 301)


Unnamed: 0,C0000786,C0000822,C0001418,C0001787,C0001973,C0002152,C0002395,C0002736,C0003873,C0004096,...,C4317123,C4505436,C4505456,C4552091,C4552766,C4704862,C4707243,C4721453,C4721507,C4722327
A1BG,0.019305,0.001287,0.0,0.002976,0.000704,0.000305,0.0,0.0,0.002996,0.001681,...,0.004267,0.003503,0.001001,0.00081,0.001287,0.000925,0.018831,0.000649,0.001081,0.036284
A1CF,0.018822,0.000965,0.011245,0.004464,0.000704,0.006865,0.001289,0.0,0.0,0.004762,...,0.008692,0.000834,0.001501,0.001851,0.012227,0.000694,0.000649,0.0,0.0,0.00508
A2M,0.035875,0.031371,0.021948,0.014385,0.015207,0.010984,0.004725,0.052459,0.02027,0.017367,...,0.007269,0.011343,0.013678,0.027412,0.019466,0.039209,0.001299,0.055195,0.02018,0.024673
A2ML1,0.002735,0.0,0.0,0.000496,0.000366,0.00061,0.0,0.0,0.0,0.0,...,0.000474,0.000167,0.0,0.0,0.000322,0.0,0.003896,0.0,0.0,0.007983
A4GALT,0.159266,0.157819,0.134399,0.10119,0.145082,0.156674,0.309923,0.114208,0.094595,0.197479,...,0.223293,0.235696,0.077398,0.130234,0.157014,0.132663,0.188961,0.222078,0.136937,0.176343


## 1.5 Fraction Betweenness

In [149]:
importlib.reload(metrics_functions)
process_fraction_betweenness = metrics_functions.fraction_betweenness2(reactome_proteins_indexes_apid_huri_df, graph_apid_huri)
process_fraction_betweenness_df = pd.DataFrame.from_dict(process_fraction_betweenness)
process_fraction_betweenness_df = process_fraction_betweenness_df.rename(index=dict(zip(list(process_fraction_betweenness_df.index),list(graph_apid_huri.vs['name']))))
process_fraction_betweenness_df.fillna(value=0, inplace=True)
process_fraction_betweenness_df.to_csv('../../data/processed/metrics/process_fraction_betweenness_fp.csv')
print(process_fraction_betweenness_df.shape)
process_fraction_betweenness_df.head()

  0%|          | 0/5358 [00:00<?, ?it/s]

(17204, 429)


Unnamed: 0_level_0,R-HSA-1031716,R-HSA-112379,R-HSA-112385,R-HSA-1168640,R-HSA-1214188,R-HSA-1234159,R-HSA-141409,R-HSA-141422,R-HSA-141431,R-HSA-141439,...,R-HSA-9670149,R-HSA-9698928,R-HSA-9710490,R-HSA-977224,R-HSA-983140,R-HSA-983147,R-HSA-983156,R-HSA-983157,R-HSA-983259,R-HSA-983266
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.009615,...,0.0,0.0,0.0,0.0,1.793637,1.730643,1.702576,1.734266,0.0,0.0
A1CF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A2M,0.010612,0.044899,0.175973,0.0,0.04,0.0,0.0,0.0,0.002532,0.0,...,0.134413,0.0,0.140792,0.130034,17.723436,12.568642,14.036075,14.224804,0.275011,1.540129
A2ML1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A4GALT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [116]:
importlib.reload(metrics_functions)
disease_fraction_betweenness = metrics_functions.fraction_betweenness2(disgenet_prot_index_main_comp, graph_apid_huri)
disease_fraction_betweenness_df = pd.DataFrame.from_dict(disease_fraction_betweenness)
disease_fraction_betweenness_df = disease_fraction_betweenness_df.rename(index=dict(zip(list(disease_fraction_betweenness_df.index),list(graph_apid_huri.vs['name']))))
disease_fraction_betweenness_df.fillna(value=0, inplace=True)
disease_fraction_betweenness_df.to_csv('../../data/processed/metrics/disease_fraction_betweenness_fp.csv')
print(disease_fraction_betweenness_df.shape)
disease_fraction_betweenness_df.head()

  0%|          | 0/7198 [00:00<?, ?it/s]

(17204, 203)


Unnamed: 0_level_0,C0001418,C0001973,C0002152,C0002395,C0002736,C0003873,C0004153,C0004238,C0004352,C0005684,...,C3714636,C3714758,C4277682,C4277690,C4279912,C4505456,C4552091,C4704862,C4707243,C4721507
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,0.212934,0.019101,0.0,0.0,0.0,0.0,3.696973,0.0,7.858946,0.02213,...,0.0,0.034809,0.056698,0.0,0.056698,1.81214,0.034809,0.034809,0.018463,0.0
A1CF,0.010431,0.077411,0.0,0.064229,0.0,1.567072,0.0,0.737858,0.004175,0.0,...,0.0,0.0,0.017221,0.0,0.017221,0.0,0.0,0.0,0.009709,0.0
A2M,52.922762,227.817771,72.625051,58.239492,0.029711,258.993327,15.226463,5.828678,153.914565,39.738747,...,28.582884,5.354143,384.684859,0.227587,389.080035,28.463313,1.11461,1.074905,4.075592,69.812477
A2ML1,0.323394,0.0,0.008772,0.0,0.0,0.081765,0.0,0.0,0.085714,0.0,...,0.0,0.0894,0.372839,0.0,0.256981,0.0,0.0894,0.150764,0.0,0.0
A4GALT,0.0,0.002353,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.006289,0.0,0.006289,0.0,0.0,0.0,0.0,0.0


In [9]:
importlib.reload(metrics_functions)
disease_fraction_betweenness_conservative = metrics_functions.fraction_betweenness2(disgenet_prot_index_conservative_module, graph_apid_huri)
disease_fraction_betweenness_conservative_df = pd.DataFrame.from_dict(disease_fraction_betweenness_conservative)
disease_fraction_betweenness_conservative_df = disease_fraction_betweenness_conservative_df.rename(index=dict(zip(list(disease_fraction_betweenness_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_fraction_betweenness_conservative_df.fillna(value=0, inplace=True)
disease_fraction_betweenness_conservative_df.to_csv('../../data/processed/metrics/disease_fraction_betweenness_conservative_fp.csv')
print(disease_fraction_betweenness_conservative_df.shape)
disease_fraction_betweenness_conservative_df.head()

(17204, 301)


Unnamed: 0_level_0,C0000786,C0000822,C0001418,C0001787,C0001973,C0002152,C0002395,C0002736,C0003873,C0004096,...,C4317123,C4505436,C4505456,C4552091,C4552766,C4704862,C4707243,C4721453,C4721507,C4722327
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,0.012146,0.012146,0.171917,0.0,6.030181,0.0,0.0,0.0,0.0,0.0,...,0.011236,0.011236,1.682005,0.0,0.012146,0.0,0.0,0.0,0.0,0.0
A1CF,0.531681,0.520901,0.0,0.0,0.16902,0.0,0.021224,0.0,0.47032,0.0,...,0.0,0.0,0.0,0.0,0.506349,0.0,0.009709,0.0,0.0,0.0
A2M,57.416798,58.00176,43.771009,2.214591,163.746591,44.419069,41.192555,0.046407,167.025374,29.691933,...,41.184075,39.973768,25.550565,1.311266,60.541854,0.280434,0.631843,4.550468,51.857151,0.142366
A2ML1,0.0,0.0,0.17386,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821
A4GALT,0.0,0.0,0.0,0.0,0.002353,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 1.6 Random Walks with Restart

In [167]:
importlib.reload(metrics_functions)
process_rwr = metrics_functions.random_walk_restart(graph_apid_huri, reactome_proteins_indexes_apid_huri_df)
process_rwr_df = pd.DataFrame.from_dict(process_rwr)
process_rwr_df = process_rwr_df.rename(index=dict(zip(list(process_rwr_df.index),list(graph_apid_huri.vs['name']))))
process_rwr_df.to_csv('../../data/processed/metrics/process_rwr_fp.csv')
print(process_rwr_df.shape)
process_rwr_df.head()

  0%|          | 0/429 [00:00<?, ?it/s]

(17204, 429)


Unnamed: 0,R-HSA-1031716,R-HSA-112379,R-HSA-112385,R-HSA-1168640,R-HSA-1214188,R-HSA-1234159,R-HSA-141409,R-HSA-141422,R-HSA-141431,R-HSA-141439,...,R-HSA-9670149,R-HSA-9698928,R-HSA-9710490,R-HSA-977224,R-HSA-983140,R-HSA-983147,R-HSA-983156,R-HSA-983157,R-HSA-983259,R-HSA-983266
A1BG,2.5e-05,2.1e-05,2.1e-05,0.000139,2.2e-05,0.000119,2.5e-05,2.4e-05,2.4e-05,2.4e-05,...,2.3e-05,2.3e-05,2.4e-05,2.2e-05,5.8e-05,6.5e-05,5.8e-05,5.8e-05,2.9e-05,2.9e-05
A1CF,3e-05,2.8e-05,2.8e-05,7.2e-05,2.4e-05,6.2e-05,2.8e-05,2.9e-05,3e-05,2.9e-05,...,2.5e-05,2.5e-05,2.5e-05,2.4e-05,3.2e-05,3.1e-05,3.1e-05,3.1e-05,3.2e-05,3.3e-05
A2M,0.000209,0.000247,0.00028,0.000185,0.000218,0.000197,0.000188,0.000184,0.000186,0.000185,...,0.000215,0.000179,0.000221,0.000219,0.000257,0.000273,0.000255,0.000261,0.000244,0.000243
A2ML1,2e-05,1.4e-05,1.3e-05,1.4e-05,1.1e-05,1.4e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,...,1.2e-05,1.1e-05,1.2e-05,1.1e-05,1.9e-05,1.6e-05,1.4e-05,1.4e-05,1.3e-05,1.3e-05
A4GALT,5e-06,4e-06,4e-06,5e-06,3e-06,5e-06,1.1e-05,1.1e-05,1.1e-05,1.1e-05,...,3e-06,3e-06,3e-06,3e-06,8e-06,5e-06,4e-06,4e-06,5e-06,5e-06


In [168]:
importlib.reload(metrics_functions)
disease_rwr = metrics_functions.random_walk_restart(graph_apid_huri, disgenet_prot_index_main_comp)
disease_rwr_df = pd.DataFrame.from_dict(disease_rwr)
disease_rwr_df = disease_rwr_df.rename(index=dict(zip(list(disease_rwr_df.index),list(graph_apid_huri.vs['name']))))
disease_rwr_df.to_csv('../../data/processed/metrics/disease_rwr_fp.csv')
print(disease_rwr_df.shape)
disease_rwr_df.head()

  0%|          | 0/203 [00:00<?, ?it/s]

(17204, 203)


Unnamed: 0,C0001418,C0001973,C0002152,C0002395,C0002736,C0003873,C0004153,C0004238,C0004352,C0005684,...,C3714636,C3714758,C4277682,C4277690,C4279912,C4505456,C4552091,C4704862,C4707243,C4721507
A1BG,4.4e-05,2.8e-05,2.6e-05,2.6e-05,2.6e-05,2.8e-05,0.000181,2.6e-05,4.4e-05,3.2e-05,...,2.2e-05,5.2e-05,3.4e-05,3.2e-05,3.5e-05,7.6e-05,2.7e-05,2.7e-05,6.5e-05,2.5e-05
A1CF,3.4e-05,3.2e-05,3e-05,3.7e-05,3.4e-05,4.8e-05,3.5e-05,4.4e-05,3.1e-05,3.1e-05,...,2.9e-05,3.5e-05,3e-05,3.3e-05,3.1e-05,3.4e-05,3.5e-05,3.4e-05,3.4e-05,2.8e-05
A2M,0.000413,0.000798,0.000468,0.001707,0.000231,0.001167,0.000343,0.00022,0.000272,0.000384,...,0.000859,0.00022,0.000438,0.000192,0.000446,0.00033,0.000224,0.000218,0.000311,0.000873
A2ML1,4.6e-05,1.5e-05,1.8e-05,1.9e-05,1.8e-05,2.8e-05,1.4e-05,1.5e-05,1.8e-05,1.7e-05,...,2.9e-05,3.6e-05,3.5e-05,1.3e-05,3.5e-05,1.4e-05,3.2e-05,3.2e-05,1.3e-05,2e-05
A4GALT,5e-06,9e-06,8e-06,7e-06,9e-06,6e-06,6e-06,5e-06,6e-06,5e-06,...,9e-06,5e-06,8e-06,5e-06,8e-06,5e-06,5e-06,6e-06,5e-06,1e-05


In [169]:
importlib.reload(metrics_functions)
disease_rwr_conservative = metrics_functions.random_walk_restart(graph_apid_huri, disgenet_prot_index_conservative_module)
disease_rwr_conservative_df = pd.DataFrame.from_dict(disease_rwr_conservative)
disease_rwr_conservative_df = disease_rwr_conservative_df.rename(index=dict(zip(list(disease_rwr_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_rwr_conservative_df.to_csv('../../data/processed/metrics/disease_rwr_conservative_fp.csv')
print(disease_rwr_conservative_df.shape)
disease_rwr_conservative_df.head()

  0%|          | 0/301 [00:00<?, ?it/s]

(17204, 301)


Unnamed: 0,C0000786,C0000822,C0001418,C0001787,C0001973,C0002152,C0002395,C0002736,C0003873,C0004096,...,C4317123,C4505436,C4505456,C4552091,C4552766,C4704862,C4707243,C4721453,C4721507,C4722327
A1BG,2.7e-05,2.7e-05,4.9e-05,4.9e-05,2.6e-05,2.6e-05,2.7e-05,2.6e-05,2.9e-05,2.9e-05,...,2.9e-05,3.1e-05,8.7e-05,2.4e-05,2.7e-05,2.3e-05,4.2e-05,2.6e-05,2.5e-05,2.5e-05
A1CF,6.1e-05,6.2e-05,3.1e-05,3.6e-05,3.2e-05,3e-05,5.1e-05,3.2e-05,4.4e-05,2.9e-05,...,2.9e-05,3e-05,3.6e-05,3.5e-05,6.8e-05,3.6e-05,3.5e-05,3.2e-05,2.7e-05,3.5e-05
A2M,0.000491,0.000493,0.000523,0.000301,0.000371,0.000459,0.002113,0.000233,0.000642,0.000496,...,0.000435,0.000454,0.000357,0.000207,0.000492,0.000207,0.000265,0.000383,0.001062,0.000251
A2ML1,4e-05,4e-05,5e-05,1.6e-05,1.6e-05,1.6e-05,1.8e-05,1.7e-05,2.2e-05,2.3e-05,...,1.6e-05,1.6e-05,1.4e-05,2.6e-05,4.1e-05,2.6e-05,1.3e-05,1.6e-05,2e-05,3.4e-05
A4GALT,5e-06,5e-06,5e-06,5e-06,1e-05,6e-06,6e-06,5e-06,5e-06,6e-06,...,7e-06,7e-06,5e-06,5e-06,5e-06,5e-06,5e-06,6e-06,7e-06,5e-06
