In [2]:
import pandas as pd
import numpy as np
import networkx as nx
import nxviz as nv
from nxviz import MatrixPlot, ArcPlot, CircosPlot
import matplotlib.pyplot as plt

# Import data and delete nodes and edges

In [3]:
nodes = pd.read_csv("processed_data/nodes_diseases_AutismGrouped.csv")
nodes.head(2)

Unnamed: 0,diseaseId,diseaseName,diseaseType,diseaseClass,diseaseSemanticType,autism_subtype_broad,n_genes,is_autism
0,C0019209,Hepatomegaly,phenotype,C06;C23,Finding,Not Autism,43,False
1,C0036341,Schizophrenia,disease,F03,Mental or Behavioral Dysfunction,Not Autism,1022,False


In [4]:
edges = pd.read_csv("processed_data/edges_shared_genes_AutismGrouped.csv")
edges.head(2)

Unnamed: 0,diseaseId_x,diseaseId_y,n_shared_genes,n_genes_x,n_genes_y,n_total_genes,p,p_log
0,A00,A01,3,292,9,9413,0.002154,2.666773
1,A00,A02,1,292,3,9413,0.090187,1.044855


## Delete edges with p > 0.001

In [5]:
edges_p3 = edges[edges['p_log']>=3]
edges_p3.head()

Unnamed: 0,diseaseId_x,diseaseId_y,n_shared_genes,n_genes_x,n_genes_y,n_total_genes,p,p_log
3,A00,A04,3,292,4,9413,0.000115,3.937982
9,A00,A10,5,292,23,9413,0.000583,3.234176
23,A00,C0001430,8,292,36,9413,1.1e-05,4.968263
27,A00,C0001723,4,292,6,9413,1.3e-05,4.888589
33,A00,C0001956,9,292,47,9413,1.1e-05,4.961774


# Load data into NetworkX graph (p<0.01)

In [6]:
# Create the NetworkX graph
G = nx.Graph()

# Add nodes
for idx, row in nodes.iterrows():
    G.add_node(row['diseaseId'], 
               diseaseName=row['diseaseName'],
               diseaseType=row['diseaseType'],
               diseaseClass=row['diseaseClass'],
               diseaseSemanticType=row['diseaseSemanticType'],
               autism_subtype_broad=row['autism_subtype_broad'],
               n_genes=row['n_genes'],
               is_autism=row['is_autism'])

# Add edges
for idx, row in edges_p3.iterrows():
    G.add_edge(row['diseaseId_x'], row['diseaseId_y'], 
               n_shared_genes=row['n_shared_genes'],
               n_genes_x=row['n_genes_x'],
               n_genes_y=row['n_genes_y'],
               n_total_genes=row['n_total_genes'],
               p=row['p'],
               p_log=row['p_log'])

# Degree & Betweenness Centrality

## Degree & Degree centrality

In [7]:
deg_cent = nx.degree_centrality(G)

for n, d in G.nodes(data=True):
    d['degree'] = nx.degree(G, n)
    d['degree_centrality'] = deg_cent[n]

## Betweenness centrality

In [8]:
btw_cent = nx.betweenness_centrality(G)

for n, d in G.nodes(data=True):
    d['btw_centrality'] = btw_cent[n]

## Save network containing all diseases

In [9]:
nx.write_graphml_lxml(G, "graphml/p3_network_all_diseases.graphml")

## Create network only with autism and their neighbors

In [10]:
autism_nodes = [n for n, d in G.nodes(data=True) if d['is_autism']==True]
autism_subgraph_nodes = set(autism_nodes)
for node in autism_nodes:
    autism_subgraph_nodes = autism_subgraph_nodes.union(set(G.neighbors(node)))

autism_subgraph_nodes

{'A00',
 'A01',
 'A02',
 'A03',
 'A04',
 'A05',
 'A06',
 'A07',
 'A08',
 'A09',
 'A10',
 'A11',
 'A12',
 'A13',
 'A14',
 'A15',
 'A16',
 'A17',
 'C0000772',
 'C0001430',
 'C0001723',
 'C0001956',
 'C0001969',
 'C0001973',
 'C0002152',
 'C0002395',
 'C0002888',
 'C0003081',
 'C0003090',
 'C0003469',
 'C0003615',
 'C0003811',
 'C0003865',
 'C0003869',
 'C0003873',
 'C0003875',
 'C0004096',
 'C0004114',
 'C0004153',
 'C0004930',
 'C0004936',
 'C0005586',
 'C0005587',
 'C0005684',
 'C0005695',
 'C0005944',
 'C0006111',
 'C0006272',
 'C0006434',
 'C0006826',
 'C0007097',
 'C0007102',
 'C0007103',
 'C0007112',
 'C0007115',
 'C0007131',
 'C0007134',
 'C0007137',
 'C0007194',
 'C0007279',
 'C0007370',
 'C0007786',
 'C0007847',
 'C0007959',
 'C0008073',
 'C0008149',
 'C0008297',
 'C0008909',
 'C0009171',
 'C0009241',
 'C0009319',
 'C0009375',
 'C0009402',
 'C0009404',
 'C0009405',
 'C0010273',
 'C0010346',
 'C0010474',
 'C0010709',
 'C0010828',
 'C0011206',
 'C0011265',
 'C0011570',
 'C0011573'

In [39]:
G_autism = G.subgraph(autism_subgraph_nodes).copy()

In [40]:
deg_cent = nx.degree_centrality(G_autism)

for n, d in G_autism.nodes(data=True):
    d['degree_autismonly'] = nx.degree(G_autism, n)
    d['degree_centrality_autismonly'] = deg_cent[n]

In [36]:
btw_cent_autism = nx.betweenness_centrality(G_autism)

for n, d in G_autism.nodes(data=True):
    d['btw_centrality_autismonly'] = btw_cent_autism[n]

In [29]:
p_logs = [d['p_log'] for n1, n2, d in G_autism.edges(data=True)]


In [41]:
for n1, n2, d in G_autism.edges(data=True):
    if d['p_log'] >= 10:
        d['p_log_trunc'] = 10
    else:
        d['p_log_trunc'] = int(d['p_log'])

### Save network only with autism and their neighbors

In [42]:
nx.write_graphml_lxml(G_autism, "graphml/p3_network_autism_neighbors.graphml")