In [1]:
import pandas as pd
from igraph import Graph
import disease_process_proteins
import metrics_functions
import importlib
import numpy as np
from ast import literal_eval
from tqdm.notebook import tqdm

In [3]:
reactome_proteins_indexes_apid_huri_df = pd.read_csv("../../data/processed/reactome_proteins_indexes_apid_huri.csv", sep=',', header=0)
disgenet_proteins_indexes_apid_huri_df = pd.read_csv("../../data/processed/disgenet_prot_index_main_comp.csv", sep=',', header=0)

protein_ids_apid_huri_df = pd.read_csv("../../data/processed/protein_ids_apid_huri.csv", sep=',', header=0)
adj_matrix_apid_huri = np.load("../../data/processed/adjacency_matrix_apid_huri.npy")
ppi_80 = np.load("../../data/processed/ppis/ppis_red80_apid_huri.npy", allow_pickle=True)
ppi_80_protein = np.load("../../data/processed/ppis/ppis_red_protein80_apid_huri.npy", allow_pickle=True)

In [3]:
reactome_proteins_indexes_apid_huri_df['protein_index'] = reactome_proteins_indexes_apid_huri_df['protein_index'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['protein_index'] = disgenet_proteins_indexes_apid_huri_df['protein_index'].apply(literal_eval)
reactome_proteins_indexes_apid_huri_df['proteins_ids'] = reactome_proteins_indexes_apid_huri_df['proteins_ids'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['proteins_ids'] = disgenet_proteins_indexes_apid_huri_df['proteins_ids'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['conservative_module'] = disgenet_proteins_indexes_apid_huri_df['conservative_module'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['added_nodes'] = disgenet_proteins_indexes_apid_huri_df['added_nodes'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['conservative_module_ids'] = disgenet_proteins_indexes_apid_huri_df['conservative_module_ids'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['added_nodes_ids'] = disgenet_proteins_indexes_apid_huri_df['added_nodes_ids'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['main_component'] = disgenet_proteins_indexes_apid_huri_df['main_component'].apply(literal_eval)
disgenet_proteins_indexes_apid_huri_df['main_component_ids'] = disgenet_proteins_indexes_apid_huri_df['main_component_ids'].apply(literal_eval)

In [4]:
graph_apid_huri = Graph.Read_GML("../../data/processed/graph_apid_huri")

In [5]:
disgenet_prot_index_conservative_module = disgenet_proteins_indexes_apid_huri_df[['process', 'conservative_module_ids', 'conservative_module', 'len_cm']]
disgenet_prot_index_conservative_module.columns = ['process', 'proteins_ids', 'protein_index', 'n_proteins']

In [6]:
disgenet_prot_index_main_comp = disgenet_proteins_indexes_apid_huri_df[disgenet_proteins_indexes_apid_huri_df['increase']<0.4][['process', 'main_component_ids', 'main_component', 'len_main_component']]
disgenet_prot_index_main_comp.columns = ['process', 'proteins_ids', 'protein_index', 'n_proteins']

# 1. Target Process Metrics

## 1.1. Hypergeometric Test

For a given protein and given process, the number of interactions formed between the protein and the proteins of the process are summed. The hypergeometric test is calculated given this number, the number total number of interactions of the protein,the number of proteins of the process and the total number of proteins. For an easier analysis, the p-values are presented in a -log10 form (higher -log10 means lower p-value).

In [None]:
importlib.reload(metrics_functions)
process_hypergeometric = metrics_functions.hypergeometric_test(graph_apid_huri, reactome_proteins_indexes_apid_huri_df, adj_matrix_apid_huri)
process_hypergeometric_df = pd.DataFrame.from_dict(process_hypergeometric, orient='index')
process_hypergeometric_df.to_csv('../../data/processed/metrics/process_hypergeometric_apid_huri.csv')
process_hypergeometric_df.head()

In [24]:
importlib.reload(metrics_functions)
disease_hypergeometric = metrics_functions.hypergeometric_test(graph_apid_huri, disgenet_prot_index_main_comp, adj_matrix_apid_huri)
disease_hypergeometric_df = pd.DataFrame.from_dict(disease_hypergeometric, orient='index')
disease_hypergeometric_df.to_csv('../../data/processed/metrics/disease_hypergeometric_apid_huri.csv')
disease_hypergeometric_df.head()

  0%|          | 0/17204 [00:00<?, ?it/s]

Unnamed: 0,C0001418,C0001973,C0002152,C0002395,C0002736,C0003873,C0004153,C0004238,C0004352,C0005684,...,C3714636,C3714758,C4277682,C4277690,C4279912,C4505456,C4552091,C4704862,C4707243,C4721507
A1BG,2.04396,1.379243,0.151996,0.126394,0.083059,0.253634,5.689782,1.747847,1.381316,1.943301,...,0.07973,0.182096,0.376095,0.144201,0.376095,4.914419,0.182096,0.182096,2.672936,0.103047
A1CF,1.785112,0.487009,0.208038,1.923426,0.113683,7.996198,2.227873,3.469776,0.485468,0.230917,...,0.109126,0.249235,0.514763,1.815525,0.514763,0.182132,0.249235,0.249235,0.101533,0.141041
A2M,8.715559,6.484236,8.627216,4.215602,1.06756,14.165729,7.389683,5.186704,4.185355,4.737281,...,3.827877,2.134912,8.798478,1.561258,8.798478,4.059586,2.134912,2.134912,4.003607,6.730071
A2ML1,2.448706,0.224681,0.095978,0.079811,0.052448,1.984753,0.055952,0.135432,0.22397,0.106533,...,0.050345,2.272826,1.664898,0.091055,1.664898,0.084027,2.272826,2.272826,0.046842,0.065069
A4GALT,0.031516,0.074876,0.031985,0.026598,0.017478,0.053373,0.018646,0.045133,0.074639,0.035503,...,0.016778,0.038319,2.605643,0.030345,2.605643,0.028002,0.038319,0.038319,0.01561,0.021685


In [7]:
importlib.reload(metrics_functions)
disease_hypergeometric_conservative = metrics_functions.hypergeometric_test(graph_apid_huri, disgenet_prot_index_conservative_module, adj_matrix_apid_huri)
disease_hypergeometric_conservative_df = pd.DataFrame.from_dict(disease_hypergeometric_conservative, orient='index')
disease_hypergeometric_conservative_df.to_csv('../../data/processed/metrics/disease_hypergeometric_conservative_apid_huri.csv')
disease_hypergeometric_conservative_df.head()

  0%|          | 0/17204 [00:00<?, ?it/s]

Unnamed: 0,C0000786,C0000822,C0001418,C0001787,C0001973,C0002152,C0002395,C0002736,C0003873,C0004096,...,C4317123,C4505436,C4505456,C4552091,C4552766,C4704862,C4707243,C4721453,C4721507,C4722327
A1BG,0.113049,0.113049,2.2144,0.065309,1.57049,0.116385,0.098604,0.061983,0.187675,0.086389,...,0.114161,0.110826,5.260844,0.133069,0.113049,0.133069,0.056441,0.056441,0.076401,0.054224
A1CF,4.621729,4.621729,0.168428,0.089389,0.370151,0.159296,0.134959,0.084836,3.702238,0.118241,...,0.156252,0.151688,0.151688,0.182132,4.621729,0.182132,0.07725,0.07725,0.10457,0.074217
A2M,3.030981,3.030981,5.981776,2.529585,5.448077,2.969242,5.005173,0.494507,7.399312,1.054729,...,1.792428,1.825588,4.626353,1.061647,3.030981,1.061647,0.450288,1.242269,5.871015,0.432605
A2ML1,0.071385,0.071385,2.628989,0.04124,0.170769,0.073491,0.062263,0.039139,0.118507,0.05455,...,0.072087,0.069981,0.069981,0.084027,0.071385,0.084027,0.035639,0.035639,0.048243,0.03424
A4GALT,0.023789,0.023789,0.025895,0.013743,0.05691,0.024491,0.02075,0.013043,0.039493,0.018179,...,0.024023,0.023322,0.023322,0.028002,0.023789,0.028002,0.011877,0.011877,0.016077,0.011411


## 1.2 Shortest Paths

In [None]:
importlib.reload(metrics_functions)
process_shortest_paths = metrics_functions.shortest_paths(graph_apid_huri, reactome_proteins_indexes_apid_huri_df)
process_shortest_paths_df = pd.DataFrame.from_dict(process_shortest_paths)
process_shortest_paths_df = process_shortest_paths_df.rename(index=dict(zip(list(process_shortest_paths_df.index),list(graph_apid_huri.vs['name']))))
process_shortest_paths_df.to_csv('../../data/processed/metrics/process_shortest_paths_apid_huri.csv')
print(process_shortest_paths_df.shape)
process_shortest_paths_df.head()

In [25]:
importlib.reload(metrics_functions)
disease_shortest_paths = metrics_functions.shortest_paths(graph_apid_huri, disgenet_prot_index_main_comp)
disease_shortest_paths_df = pd.DataFrame.from_dict(disease_shortest_paths)
disease_shortest_paths_df = disease_shortest_paths_df.rename(index=dict(zip(list(disease_shortest_paths_df.index),list(graph_apid_huri.vs['name']))))
disease_shortest_paths_df.to_csv('../../data/processed/metrics/disease_shortest_paths_apid_huri.csv')
print(disease_shortest_paths_df.shape)
disease_shortest_paths_df.head()

  0%|          | 0/5825 [00:00<?, ?it/s]

(17204, 5825)


Unnamed: 0,PCSK1,RPS14,RCN1,TBP,PTGER3,CABLES1,SLC27A6,RRM1,GAB2,PIK3R1,...,NCCRP1,KDM2A,COQ9,PTGS1,GSTZ1,CPAMD8,KCND3,UBE3C,SEC31A,GATA4
A1BG,4,3,3,2,3,3,3,3,3,3,...,3,3,3,3,3,3,4,3,3,3
A1CF,3,3,3,3,2,3,3,3,3,3,...,3,3,3,3,2,4,4,3,2,3
A2M,3,3,2,2,2,2,3,2,2,1,...,3,2,3,3,2,3,3,3,3,3
A2ML1,4,3,3,3,3,4,3,3,3,3,...,2,3,3,4,3,4,4,3,3,3
A4GALT,3,3,3,3,4,3,3,4,3,3,...,4,4,4,4,4,4,3,3,3,4


In [8]:
importlib.reload(metrics_functions)
disease_shortest_paths_conservative = metrics_functions.shortest_paths(graph_apid_huri, disgenet_prot_index_conservative_module)
disease_shortest_paths_conservative_df = pd.DataFrame.from_dict(disease_shortest_paths_conservative)
disease_shortest_paths_conservative_df = disease_shortest_paths_conservative_df.rename(index=dict(zip(list(disease_shortest_paths_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_shortest_paths_conservative_df.to_csv('../../data/processed/metrics/disease_shortest_paths_conservative_apid_huri.csv')
print(disease_shortest_paths_conservative_df.shape)
disease_shortest_paths_conservative_df.head()

  0%|          | 0/5348 [00:00<?, ?it/s]

(17204, 5348)


Unnamed: 0,SLC6A15,SP4,KDM5A,PAK3,ITGAV,SRRT,LRTOMT,YBX1,CGA,EEF1A1,...,DDX60,PLIN1,BAK1,SLC27A6,ITGB4,CTH,PDLIM4,GLIS3,ASIC2,SMPD1
A1BG,3,3,3,4,3,2,4,3,4,2,...,3,3,3,3,3,3,3,3,3,2
A1CF,3,2,3,3,3,3,4,2,4,2,...,2,3,2,3,2,3,3,2,3,3
A2M,2,2,3,2,2,3,4,2,3,2,...,2,3,3,3,2,3,3,3,3,2
A2ML1,3,3,3,4,3,3,4,3,4,3,...,3,3,3,3,3,3,3,3,3,4
A4GALT,2,4,4,4,3,3,5,3,4,3,...,3,4,3,3,3,4,4,4,4,2


## 1.3 Closeness

In [None]:
importlib.reload(metrics_functions)
process_closeness = metrics_functions.closeness(process_shortest_paths_df, reactome_proteins_indexes_apid_huri_df)
process_closeness_df = pd.DataFrame.from_dict(process_closeness)
process_closeness_df = process_closeness_df.rename(index=dict(zip(list(process_closeness_df.index),list(graph_apid_huri.vs['name']))))
process_closeness_df.to_csv('../../data/processed/metrics/process_closeness_apid_huri.csv')
print(process_closeness_df.shape)
process_closeness_df.head()

In [26]:
importlib.reload(metrics_functions)
disease_closeness = metrics_functions.closeness(disease_shortest_paths_df, disgenet_prot_index_main_comp)
disease_closeness_df = pd.DataFrame.from_dict(disease_closeness)
disease_closeness_df = disease_closeness_df.rename(index=dict(zip(list(disease_closeness_df.index),list(graph_apid_huri.vs['name']))))
disease_closeness_df.to_csv('../../data/processed/metrics/disease_closeness_apid_huri.csv')
print(disease_closeness_df.shape)
disease_closeness_df.head()

  0%|          | 0/203 [00:00<?, ?it/s]

(17204, 203)


Unnamed: 0,C0001418,C0001973,C0002152,C0002395,C0002736,C0003873,C0004153,C0004238,C0004352,C0005684,...,C3714636,C3714758,C4277682,C4277690,C4279912,C4505456,C4552091,C4704862,C4707243,C4721507
A1BG,0.36193,0.322874,0.339109,0.343373,0.347222,0.331395,0.352423,0.337413,0.334034,0.336283,...,0.32287,0.338843,0.335657,0.331633,0.335657,0.368098,0.338843,0.338843,0.34359,0.338182
A1CF,0.359043,0.332292,0.339109,0.35514,0.360577,0.342857,0.343348,0.35219,0.33758,0.344671,...,0.325792,0.345263,0.333994,0.333333,0.333994,0.351906,0.345263,0.345263,0.352632,0.334532
A2M,0.436893,0.389024,0.412651,0.43346,0.418994,0.410072,0.430108,0.405462,0.402023,0.413043,...,0.39779,0.405941,0.406514,0.38806,0.406514,0.424028,0.405941,0.405941,0.435065,0.415179
A2ML1,0.330073,0.308809,0.311364,0.322946,0.323276,0.317107,0.311284,0.323283,0.315476,0.3147,...,0.311688,0.319688,0.314953,0.317848,0.314953,0.331492,0.319688,0.319688,0.311628,0.316327
A4GALT,0.313225,0.289737,0.307865,0.308108,0.321888,0.294194,0.305344,0.301563,0.289091,0.296875,...,0.290323,0.299817,0.300357,0.292135,0.300357,0.314961,0.299817,0.299817,0.29646,0.305921


In [9]:
importlib.reload(metrics_functions)
disease_closeness_conservative = metrics_functions.closeness(disease_shortest_paths_conservative_df, disgenet_prot_index_conservative_module)
disease_closeness_conservative_df = pd.DataFrame.from_dict(disease_closeness_conservative)
disease_closeness_conservative_df = disease_closeness_conservative_df.rename(index=dict(zip(list(disease_closeness_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_closeness_conservative_df.to_csv('../../data/processed/metrics/disease_closeness_conservative_apid_huri.csv')
print(disease_closeness_conservative_df.shape)
disease_closeness_conservative_df.head()

  0%|          | 0/301 [00:00<?, ?it/s]

(17204, 301)


Unnamed: 0,C0000786,C0000822,C0001418,C0001787,C0001973,C0002152,C0002395,C0002736,C0003873,C0004096,...,C4317123,C4505436,C4505456,C4552091,C4552766,C4704862,C4707243,C4721453,C4721507,C4722327
A1BG,0.31875,0.31875,0.356913,0.347059,0.31436,0.333333,0.342308,0.343558,0.326255,0.32636,...,0.322884,0.324675,0.369004,0.330579,0.31875,0.330579,0.331169,0.337748,0.334951,0.337931
A1CF,0.337748,0.337748,0.355769,0.343023,0.324433,0.332278,0.354582,0.352201,0.339357,0.32636,...,0.319876,0.322581,0.350877,0.340909,0.337748,0.340909,0.346939,0.342282,0.331731,0.347518
A2M,0.387833,0.387833,0.428571,0.412587,0.372699,0.403846,0.432039,0.414815,0.402381,0.386139,...,0.381481,0.383142,0.429185,0.392157,0.387833,0.392157,0.414634,0.414634,0.405882,0.395161
A2ML1,0.300885,0.300885,0.323615,0.315508,0.301489,0.302594,0.318996,0.314607,0.310662,0.301158,...,0.30117,0.302115,0.330033,0.314136,0.300885,0.314136,0.309091,0.314815,0.310811,0.322368
A4GALT,0.276423,0.276423,0.309192,0.293532,0.283547,0.303468,0.305842,0.311111,0.286927,0.293233,...,0.29096,0.289855,0.31348,0.291971,0.276423,0.291971,0.291429,0.305389,0.30131,0.293413


## 1.4 Betweenness

In [None]:
importlib.reload(metrics_functions)
process_betweenness = metrics_functions.betweenness(process_shortest_paths_df, reactome_proteins_indexes_apid_huri_df, graph_apid_huri)
process_betweenness_df = pd.DataFrame.from_dict(process_betweenness)
process_betweenness_df = process_betweenness_df.rename(index=dict(zip(list(process_betweenness_df.index),list(graph_apid_huri.vs['name']))))
process_betweenness_df.fillna(value=0, inplace=True)
process_betweenness_df.to_csv('../../data/processed/metrics/process_betweenness_apid_huri.csv')
print(process_betweenness_df.shape)
process_betweenness_df.head()

In [None]:
importlib.reload(metrics_functions)
disease_betweenness = metrics_functions.betweenness(disease_shortest_paths_df, disgenet_prot_index_main_comp, graph_apid_huri)
disease_betweenness_df = pd.DataFrame.from_dict(disease_betweenness)
disease_betweenness_df = disease_betweenness_df.rename(index=dict(zip(list(disease_betweenness_df.index),list(graph_apid_huri.vs['name']))))
disease_betweenness_df.fillna(value=0, inplace=True)
disease_betweenness_df.to_csv('../../data/processed/metrics/disease_betweenness_apid_huri.csv')
print(disease_betweenness_df.shape)
disease_betweenness_df.head()

In [10]:
importlib.reload(metrics_functions)
disease_betweenness_conservative = metrics_functions.betweenness(disease_shortest_paths_conservative_df, disgenet_prot_index_conservative_module, graph_apid_huri)
disease_betweenness_conservative_df = pd.DataFrame.from_dict(disease_betweenness_conservative)
disease_betweenness_conservative_df = disease_betweenness_conservative_df.rename(index=dict(zip(list(disease_betweenness_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_betweenness_conservative_df.fillna(value=0, inplace=True)
disease_betweenness_conservative_df.to_csv('../../data/processed/metrics/disease_betweenness_conservative_apid_huri.csv')
print(disease_betweenness_conservative_df.shape)
disease_betweenness_conservative_df.head()

  0%|          | 0/301 [00:00<?, ?it/s]

(17204, 301)


Unnamed: 0,C0000786,C0000822,C0001418,C0001787,C0001973,C0002152,C0002395,C0002736,C0003873,C0004096,...,C4317123,C4505436,C4505456,C4552091,C4552766,C4704862,C4707243,C4721453,C4721507,C4722327
A1BG,0.019608,0.019608,0.0,0.0,0.000102,0.000549,0.0,0.0,0.000493,0.0,...,0.0,0.0,0.0,0.005462,0.019608,0.005462,0.00549,0.0,0.0,0.0
A1CF,0.02019,0.02019,0.0,0.0,0.001428,0.004945,0.0,0.0,0.000634,0.002331,...,0.0,0.0,0.0,0.0,0.02019,0.0,0.044706,0.0,0.007673,0.0
A2M,0.041351,0.041351,0.001474,0.00526,0.009489,0.032601,0.00383,0.0,0.018949,0.012654,...,0.001142,0.001818,0.000606,0.006022,0.041351,0.006022,0.049412,0.000784,0.038789,0.0
A2ML1,0.023296,0.023296,0.000655,0.004676,0.000272,0.000366,0.0,0.000649,0.003874,0.000333,...,0.0,0.0,0.00202,0.007283,0.023296,0.007283,0.100392,0.0,0.000853,0.0
A4GALT,0.039992,0.039992,0.025061,0.019287,0.019318,0.027656,0.049796,0.0,0.045083,0.024975,...,0.021892,0.022424,0.022828,0.004342,0.039992,0.004342,0.01098,0.021961,0.060955,0.006803


## 1.5 Fraction Betweenness

In [None]:
importlib.reload(metrics_functions)
process_fraction_betweenness = metrics_functions.fraction_betweenness(reactome_proteins_indexes_apid_huri_df, graph_apid_huri)
process_fraction_betweenness_df = pd.DataFrame.from_dict(process_fraction_betweenness)
process_fraction_betweenness_df = process_fraction_betweenness_df.rename(index=dict(zip(list(process_fraction_betweenness_df.index),list(graph_apid_huri.vs['name']))))
process_fraction_betweenness_df.fillna(value=0, inplace=True)
process_fraction_betweenness_df.to_csv('../../data/processed/metrics/process_fraction_betweenness_apid_huri.csv')
print(process_fraction_betweenness_df.shape)
process_fraction_betweenness_df.head()

In [None]:
importlib.reload(metrics_functions)
disease_fraction_betweenness = metrics_functions.fraction_betweenness(disgenet_prot_index_main_comp, graph_apid_huri)
disease_fraction_betweenness_df = pd.DataFrame.from_dict(disease_fraction_betweenness)
disease_fraction_betweenness_df = disease_fraction_betweenness_df.rename(index=dict(zip(list(disease_fraction_betweenness_df.index),list(graph_apid_huri.vs['name']))))
disease_fraction_betweenness_df.fillna(value=0, inplace=True)
disease_fraction_betweenness_df.to_csv('../../data/processed/metrics/disease_fraction_betweenness_apid_huri.csv')
print(disease_fraction_betweenness_df.shape)
disease_fraction_betweenness_df.head()

In [None]:
importlib.reload(metrics_functions)
disease_fraction_betweenness_conservative = metrics_functions.fraction_betweenness(disgenet_prot_index_conservative_module, graph_apid_huri)
disease_fraction_betweenness_conservative_df = pd.DataFrame.from_dict(disease_fraction_betweenness)
disease_fraction_betweenness_conservative_df = disease_fraction_betweenness_conservative_df.rename(index=dict(zip(list(disease_fraction_betweenness_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_fraction_betweenness_conservative_df.fillna(value=0, inplace=True)
disease_fraction_betweenness_conservative_df.to_csv('../../data/processed/metrics/disease_fraction_betweenness_conservative_apid_huri.csv')
print(disease_fraction_betweenness_conservative_df.shape)
disease_fraction_betweenness_conservative_df.head()

  0%|          | 0/5347 [00:00<?, ?it/s]

## 1.6 Random Walks with Restart

In [None]:
importlib.reload(metrics_functions)
process_rwr = metrics_functions.random_walk_restart(graph_apid_huri, reactome_proteins_indexes_apid_huri_df)
process_rwr_df = pd.DataFrame.from_dict(process_rwr)
process_rwr_df = process_rwr_df.rename(index=dict(zip(list(process_rwr_df.index),list(graph_apid_huri.vs['name']))))
process_rwr_df.to_csv('../../data/processed/metrics/process_rwr_apid_huri.csv')
print(process_rwr_df.shape)
process_rwr_df.head()

In [29]:
importlib.reload(metrics_functions)
disease_rwr = metrics_functions.random_walk_restart(graph_apid_huri, disgenet_prot_index_main_comp)
disease_rwr_df = pd.DataFrame.from_dict(disease_rwr)
disease_rwr_df = disease_rwr_df.rename(index=dict(zip(list(disease_rwr_df.index),list(graph_apid_huri.vs['name']))))
disease_rwr_df.to_csv('../../data/processed/metrics/disease_rwr_apid_huri.csv')
print(disease_rwr_df.shape)
disease_rwr_df.head()

  0%|          | 0/203 [00:00<?, ?it/s]

(17204, 203)


Unnamed: 0,C0001418,C0001973,C0002152,C0002395,C0002736,C0003873,C0004153,C0004238,C0004352,C0005684,...,C3714636,C3714758,C4277682,C4277690,C4279912,C4505456,C4552091,C4704862,C4707243,C4721507
A1BG,4.6e-05,2.9e-05,2.6e-05,2.6e-05,2.6e-05,2.9e-05,0.000197,2.6e-05,4.6e-05,3e-05,...,2.2e-05,2.7e-05,3.5e-05,3.3e-05,3.5e-05,8.1e-05,2.7e-05,2.7e-05,6.8e-05,2.6e-05
A1CF,3.4e-05,3.2e-05,3e-05,3.7e-05,3.4e-05,4.8e-05,3.6e-05,4.5e-05,3.1e-05,3.1e-05,...,2.8e-05,3.5e-05,3e-05,3.2e-05,3e-05,3.4e-05,3.5e-05,3.5e-05,3.4e-05,2.8e-05
A2M,0.000436,0.000845,0.000496,0.001836,0.000235,0.001258,0.000359,0.000223,0.000278,0.000401,...,0.000922,0.000221,0.000461,0.000188,0.000461,0.000345,0.000221,0.000221,0.000317,0.000939
A2ML1,4.9e-05,1.6e-05,1.9e-05,1.9e-05,1.5e-05,3e-05,1.5e-05,1.4e-05,1.8e-05,1.7e-05,...,3.1e-05,3.4e-05,3.7e-05,1.3e-05,3.7e-05,1.4e-05,3.4e-05,3.4e-05,1.3e-05,2e-05
A4GALT,5e-06,9e-06,8e-06,7e-06,8e-06,6e-06,6e-06,5e-06,6e-06,5e-06,...,9e-06,6e-06,8e-06,5e-06,8e-06,5e-06,6e-06,6e-06,5e-06,1e-05


In [None]:
importlib.reload(metrics_functions)
disease_rwr_conservative = metrics_functions.random_walk_restart(graph_apid_huri, disgenet_prot_index_conservative_module)
disease_rwr_conservative_df = pd.DataFrame.from_dict(disease_rwr_conservative)
disease_rwr_conservative_df = disease_rwr_conservative_df.rename(index=dict(zip(list(disease_rwr_conservative_df.index),list(graph_apid_huri.vs['name']))))
disease_rwr_conservative_df.to_csv('../../data/processed/metrics/disease_rwr_conservative_apid_huri.csv')
print(disease_rwr_conservative_df.shape)
disease_rwr_conservative_df.head()

## 1.7 Network Reductions

In [None]:
importlib.reload(metrics_functions)
ppi80_hyper, ppi80_closeness, ppi80_betweenness, ppi80_rwr, ppi80_fraction_betweenness = metrics_functions.multiple_metrics(ppi_80, reactome_proteins_indexes_apid_huri_df)
ppi80_hyper.to_csv('../../data/processed/metrics/process_ppi80_hyper_apid_huri.csv')
ppi80_closeness.to_csv('../../data/processed/metrics/process_ppi80_closeness_apid_huri.csv')
ppi80_betweenness.to_csv('../../data/processed/metrics/process_ppi80_betweenness_apid_huri.csv')
ppi80_rwr.to_csv('../../data/processed/metrics/process_ppi80_rwr_apid_huri.csv')
ppi80_fraction_betweenness.to_csv('../../data/processed/process_ppi80_fraction_betweenness_apid_huri.csv')

In [None]:
importlib.reload(metrics_functions)
protein80_hyper, protein80_closeness, protein80_betweenness, protein80_rwr, protein80_fraction_betweenness = metrics_functions.multiple_metrics(ppi_80_protein, reactome_proteins_indexes_apid_huri_df)
protein80_hyper.to_csv('../../data/processed/metrics/process_protein80_hyper_apid_huri.csv')
protein80_closeness.to_csv('../../data/processed/metrics/process_protein80_closeness_apid_huri.csv')
protein80_betweenness.to_csv('../../data/processed/metrics/process_protein80_betweenness_apid_huri.csv')
protein80_rwr.to_csv('../../data/processed/metrics/process_protein80_rwr_apid_huri.csv')
protein80_fraction_betweenness.to_csv('../../data/processed/metrics/process_protein80_fraction_betweenness_apid_huri.csv')

In [None]:
importlib.reload(metrics_functions)
disease_ppi80_hyper, disease_ppi80_closeness, disease_ppi80_betweenness, disease_ppi80_rwr, disease_ppi80_fraction_betweenness = metrics_functions.multiple_metrics(ppi_80, disgenet_prot_index_main_comp)
disease_ppi80_hyper.to_csv('../../data/processed/metrics/disease_ppi80_hyper_apid_huri.csv')
disease_ppi80_closeness.to_csv('../../data/processed/metrics/disease_ppi80_closeness_apid_huri.csv')
disease_ppi80_betweenness.to_csv('../../data/processed/metrics/disease_ppi80_betweenness_apid_huri.csv')
disease_ppi80_rwr.to_csv('../../data/processed/metrics/disease_ppi80_rwr_apid_huri.csv')
disease_ppi80_fraction_betweenness.to_csv('../../data/processed/disease_ppi80_fraction_betweenness_apid_huri.csv')

In [None]:
importlib.reload(metrics_functions)
disease_protein80_hyper, disease_protein80_closeness, disease_protein80_betweenness, disease_protein80_rwr, disease_protein80_fraction_betweenness = metrics_functions.multiple_metrics(ppi_80_protein, disgenet_prot_index_main_comp)
disease_protein80_hyper.to_csv('../../data/processed/metrics/disease_protein80_hyper_apid_huri.csv')
disease_protein80_closeness.to_csv('../../data/processed/metrics/disease_protein80_closeness_apid_huri.csv')
disease_protein80_betweenness.to_csv('../../data/processed/metrics/disease_protein80_betweenness_apid_huri.csv')
disease_protein80_rwr.to_csv('../../data/processed/metrics/disease_protein80_rwr_apid_huri.csv')
disease_protein80_fraction_betweenness.to_csv('../../data/processed/disease_protein80_fraction_betweenness_apid_huri.csv')

In [None]:
importlib.reload(metrics_functions)
disease_ppi80_hyper_conservative, disease_ppi80_closeness_conservative, disease_ppi80_betweenness_conservative, disease_ppi80_rwr_conservative, disease_ppi80_fraction_betweenness_conservative = metrics_functions.multiple_metrics(ppi_80, disgenet_prot_index_conservative_module)
disease_ppi80_hyper_conservative.to_csv('../../data/processed/metrics/disease_ppi80_hyper_conservative_apid_huri.csv')
disease_ppi80_closeness_conservative.to_csv('../../data/processed/metrics/disease_ppi80_closeness_conservative_apid_huri.csv')
disease_ppi80_betweenness_conservative.to_csv('../../data/processed/metrics/disease_ppi80_betweenness_conservative_apid_huri.csv')
disease_ppi80_rwr_conservative.to_csv('../../data/processed/metrics/disease_ppi80_rwr_conservative_apid_huri.csv')
disease_ppi80_fraction_betweenness_conservative.to_csv('../../data/processed/disease_ppi80_fraction_betweenness_conservative_apid_huri.csv')

In [None]:
importlib.reload(metrics_functions)
disease_protein80_hyper_conservative, disease_protein80_closeness_conservative, disease_protein80_betweenness_conservative, disease_protein80_rwr_conservative, disease_protein80_fraction_betweenness_conservative = metrics_functions.multiple_metrics(ppi_80_protein, disgenet_prot_index_conservative_module)
disease_protein80_hyper_conservative.to_csv('../../data/processed/metrics/disease_protein80_hyper_conservative_apid_huri.csv')
disease_protein80_closeness_conservative.to_csv('../../data/processed/metrics/disease_protein80_closeness_conservative_apid_huri.csv')
disease_protein80_betweenness_conservative.to_csv('../../data/processed/metrics/disease_protein80_betweenness_conservative_apid_huri.csv')
disease_protein80_rwr_conservative.to_csv('../../data/processed/metrics/disease_protein80_rwr_conservative_apid_huri.csv')
disease_protein80_fraction_betweenness_conservative.to_csv('../../data/processed/disease_protein80_fraction_betweenness_conservative_apid_huri.csv')