# Creating Graph with igraph 

## 0. Imports

In [1]:
import pandas as pd
from igraph import Graph
import disease_process_proteins
import importlib
import numpy as np
import statistics

## 1. Graph Creation

### 1.1. APID & HuRI Graph

In [3]:
apidhuri_interactions = pd.read_csv("../../data/processed/ppis/apid_huri_ppis.csv", sep=',', header=0)
apidhuri_interactions.head()


Unnamed: 0,HGNC_A,HGNC_B,apid,dorothea,huri,omnipath,is_directed
0,SCRIB,ARHGEF7,True,False,False,False,False
1,SCRIB,NET1,True,False,False,False,False
2,KCNA5,SCRIB,True,False,False,False,False
3,VWCE,SCRIB,True,False,False,False,False
4,DNM1L,SCRIB,True,False,False,False,False


In [4]:
big_graph_apid_huri = Graph.DataFrame(apidhuri_interactions, directed=False)
g_simple_apid_huri = big_graph_apid_huri.simplify()

In [7]:
if not g_simple_apid_huri.is_connected():
    cluster = g_simple_apid_huri.clusters()
    g_simple_apid_huri = g_simple_apid_huri.induced_subgraph(cluster[0])
g_simple_apid_huri.write_gml("../../data/processed/graph_apid_huri")

In [5]:
protein_ids_apid_huri_df = pd.DataFrame(g_simple_apid_huri.vs['name'], columns=['protein_id'])
protein_ids_apid_huri_df.to_csv('../../data/processed/protein_ids_apid_huri.csv', index=False)
protein_ids_apid_huri_df.head()

Unnamed: 0,protein_id
0,A1BG
1,A1CF
2,A2M
3,A2ML1
4,A4GALT


In [9]:
adj_matrix_apid_huri = g_simple_apid_huri.get_adjacency()
adj_matrix_apid_huri = np.array(adj_matrix_apid_huri.data)
np.save('../../data/processed/adjacency_matrix_apid_huri.npy', adj_matrix_apid_huri, allow_pickle=True, fix_imports=True)

## 2. Data Load

In [None]:
disgenet = pd.read_csv("../../data/interim/disgenet.csv", sep=',', header=0)
print(disgenet.shape)

In [None]:
disgenet = disgenet[disgenet['geneSymbol'].isin(protein_ids_apid_huri_df['protein_id'])]
print(disgenet.shape)
disgenet.head()

In [None]:
reactome = pd.read_csv("../../data/interim/ReactomeReactions.csv", sep=',', header=0)
print(reactome.shape)

In [None]:
reactome = reactome[reactome['HGNC ID'].isin(protein_ids_apid_huri_df['protein_id'])]
print(reactome.shape)
reactome.head()

### 2.1. Network Process/Disease Proteins

In [None]:
disgenet = disgenet[disgenet['geneSymbol'].isin(protein_ids_apid_huri_df['protein_id'])]
print(disgenet.shape)
disgenet.head()

In [None]:
reactome = reactome[reactome['HGNC ID'].isin(protein_ids_apid_huri_df['protein_id'])]
print(reactome.shape)
reactome.head()

In [None]:
importlib.reload(disease_process_proteins)
disgenet_proteins_indexes_apid_huri_df = disease_process_proteins.get_protein_index(disgenet, 'disgenet', g_simple_apid_huri)

In [None]:
disgenet_proteins_indexes_apid_huri_df['n_proteins']  = disgenet_proteins_indexes_apid_huri_df['protein_index'].apply(lambda x: len(x))
disgenet_proteins_indexes_apid_huri_df = disgenet_proteins_indexes_apid_huri_df[(disgenet_proteins_indexes_apid_huri_df['n_proteins'] <= 300) & (disgenet_proteins_indexes_apid_huri_df['n_proteins'] >= 50)]
disgenet_proteins_indexes_apid_huri_df.to_csv('../../data/processed/disgenet_proteins_indexes_apid_huri.csv', index=False)
disgenet_proteins_indexes_apid_huri_df.head()

In [None]:
importlib.reload(disease_process_proteins)
reactome_proteins_indexes_apid_huri_df = disease_process_proteins.get_protein_index(reactome, 'reactome', g_simple_apid_huri)

In [None]:
reactome_proteins_indexes_apid_huri_df['n_proteins']  = reactome_proteins_indexes_apid_huri_df['protein_index'].apply(lambda x: len(x))
reactome_proteins_indexes_apid_huri_df = reactome_proteins_indexes_apid_huri_df[(reactome_proteins_indexes_apid_huri_df['n_proteins'] <= 300) & (reactome_proteins_indexes_apid_huri_df['n_proteins'] >= 50)]
reactome_proteins_indexes_apid_huri_df.to_csv('../../data/processed/reactome_proteins_indexes_apid_huri.csv', index=False)
reactome_proteins_indexes_apid_huri_df.head()

## 3. Graph Random Increase/Reduction

In [2]:
ppi_interactions_apid_huri = pd.read_csv("../../data/processed/ppis/apid_huri_ppis.csv", sep=',', header=0)

In [None]:
importlib.reload(disease_process_proteins)
ppi_80_apid_huri = disease_process_proteins.random_reduction(ppi_interactions_apid_huri, 0.8, 10)
np.save('../../data/processed/ppis/ppis_red80_apid_huri.npy', ppi_80_apid_huri, allow_pickle=True, fix_imports=True)

In [30]:
importlib.reload(disease_process_proteins)
ppi_80_red = disease_process_proteins.random_reduction_protein(ppi_interactions_apid_huri, 0.8, 10)

In [31]:
np.save('../../data/processed/ppis/ppis_red_protein80_apid_huri.npy', ppi_80_red, allow_pickle=True, fix_imports=True)