In [1]:
import pandas as pd
from igraph import Graph
import disease_process_proteins
import metrics_functions
import importlib
import numpy as np
from ast import literal_eval
from tqdm.notebook import tqdm
from metrics_functions import genePANDA

In [2]:
graph = Graph.Read_GML("../../data/processed/graph_apid_huri")

In [3]:
reactome_proteins_indexes_df = pd.read_csv("../../data/processed/reactome_proteins_indexes_apid_huri.csv", sep=',', header=0)
disgenet_proteins_indexes_df = pd.read_csv("../../data/processed/disgenet_prot_index_main_comp.csv", sep=',', header=0)
disgenet_prot_index_main_comp = disgenet_proteins_indexes_df[disgenet_proteins_indexes_df['increase']<0.4]

In [4]:
reactome_labels_df = pd.read_csv("../../data/processed/reactome_labels_apid_huri.csv", sep=',', names=reactome_proteins_indexes_df['process'].values)
reactome_labels_df['protein_id'] = graph.vs['name']
reactome_labels_df.set_index('protein_id', inplace=True)

disgenet_labels_df = pd.read_csv("../../data/processed/disgenet_filtered_labels_apid_huri.csv", sep=',', names=disgenet_prot_index_main_comp['process'].values)
disgenet_labels_df['protein_id'] = graph.vs['name']
disgenet_labels_df.set_index('protein_id', inplace=True)

disgenet_labels_conservative_df = pd.read_csv('../../data/processed/disgenet_conservative_labels_apid_huri.csv', names=disgenet_proteins_indexes_df['process'].values)
disgenet_labels_conservative_df['protein_id'] = graph.vs['name']
disgenet_labels_conservative_df.set_index('protein_id', inplace=True)

In [5]:
sp = np.array(graph.shortest_paths())

In [8]:
process_genePANDA = genePANDA(graph, reactome_labels_df, sp)
process_genePANDA.to_csv('../../data/processed/metrics/process_genepanda.csv')

  0%|          | 0/429 [00:00<?, ?it/s]

In [12]:
disease_genePANDA = genePANDA(graph, disgenet_labels_df, sp)
disease_genePANDA.to_csv('../../data/processed/metrics/disease_genepanda.csv')

  0%|          | 0/203 [00:00<?, ?it/s]

In [11]:
disease_conservative_genePANDA = genePANDA(graph, disgenet_labels_conservative_df, sp)
disease_conservative_genePANDA.to_csv('../../data/processed/metrics/disease_genepanda_conservative.csv')

  0%|          | 0/301 [00:00<?, ?it/s]

# False Positives

In [6]:
reactome_proteins_indexes_fp = pd.read_csv('../../data/processed/reactome_protein_indexes_fp.csv')
disgenet_proteins_indexes_conservative_fp = pd.read_csv('../../data/processed/disgenet_protein_indexes_conservative_fp.csv')
disgenet_proteins_indexes_sca_fp = pd.read_csv('../../data/processed/disgenet_protein_indexes_fp.csv')

In [7]:
reactome_proteins_indexes_fp['fp_proteins'] = reactome_proteins_indexes_fp['fp_proteins'].apply(literal_eval)
disgenet_proteins_indexes_conservative_fp['fp_proteins'] = disgenet_proteins_indexes_conservative_fp['fp_proteins'].apply(literal_eval)
disgenet_proteins_indexes_sca_fp['fp_proteins'] = disgenet_proteins_indexes_sca_fp['fp_proteins'].apply(literal_eval)

reactome_proteins_indexes_fp['fp_proteins_index'] = reactome_proteins_indexes_fp['fp_proteins_index'].apply(literal_eval)
disgenet_proteins_indexes_conservative_fp['fp_proteins_index'] = disgenet_proteins_indexes_conservative_fp['fp_proteins_index'].apply(literal_eval)
disgenet_proteins_indexes_sca_fp['fp_proteins_index'] = disgenet_proteins_indexes_sca_fp['fp_proteins_index'].apply(literal_eval)

In [8]:
reactome_proteins_indexes_fp.drop(columns=['proteins_ids', 'protein_index'], inplace=True)
reactome_proteins_indexes_fp = reactome_proteins_indexes_fp[['process', 'fp_proteins', 'fp_proteins_index']]
reactome_proteins_indexes_fp.columns = ['process', 'proteins_ids', 'protein_index']
reactome_proteins_indexes_fp['n_proteins'] = reactome_proteins_indexes_fp['proteins_ids'].apply(lambda row: len(row))

In [9]:
disgenet_proteins_indexes_conservative_fp.drop(columns=['proteins_ids', 'protein_index'], inplace=True)
disgenet_proteins_indexes_conservative_fp = disgenet_proteins_indexes_conservative_fp[['process', 'fp_proteins', 'fp_proteins_index']]
disgenet_proteins_indexes_conservative_fp.columns = ['process', 'proteins_ids', 'protein_index']
disgenet_proteins_indexes_conservative_fp['n_proteins'] = disgenet_proteins_indexes_conservative_fp['proteins_ids'].apply(lambda row: len(row))

disgenet_proteins_indexes_sca_fp.drop(columns=['proteins_ids', 'protein_index'], inplace=True)
disgenet_proteins_indexes_sca_fp = disgenet_proteins_indexes_sca_fp[['process', 'fp_proteins', 'fp_proteins_index']]
disgenet_proteins_indexes_sca_fp.columns = ['process', 'proteins_ids', 'protein_index']
disgenet_proteins_indexes_sca_fp['n_proteins'] = disgenet_proteins_indexes_sca_fp['proteins_ids'].apply(lambda row: len(row))

In [10]:
reactome_labels_fp_df = pd.read_csv("../../data/processed/reactome_labels_fp.csv", sep=',', names=reactome_proteins_indexes_fp['process'].values)
disgenet_labels_fp_df = pd.read_csv("../../data/processed/disgenet_labels_fp.csv", sep=',', names=disgenet_proteins_indexes_sca_fp['process'].values)
disgenet_conservative_labels_fp_df = pd.read_csv("../../data/processed/disgenet_conservative_labels_fp.csv", sep=',', names=disgenet_proteins_indexes_conservative_fp['process'].values)

In [14]:
process_genePANDA_fp = genePANDA(graph, reactome_labels_fp_df, sp)
process_genePANDA_fp.to_csv('../../data/processed/metrics/process_genepanda_fp.csv')

  0%|          | 0/429 [00:00<?, ?it/s]

In [15]:
disease_genePANDA_fp = genePANDA(graph, disgenet_labels_fp_df, sp)
disease_genePANDA_fp.to_csv('../../data/processed/metrics/disease_genepanda_fp.csv')

  0%|          | 0/203 [00:00<?, ?it/s]

In [16]:
disease_conservative_genePANDA_fp = genePANDA(graph, disgenet_conservative_labels_fp_df, sp)
disease_conservative_genePANDA_fp.to_csv('../../data/processed/metrics/disease_genepanda_conservative_fp.csv')

  0%|          | 0/301 [00:00<?, ?it/s]