In [15]:
import pandas as pd
import numpy as np
import networkx as nx
import nxviz as nv
from nxviz import MatrixPlot, ArcPlot, CircosPlot
import matplotlib.pyplot as plt
import seaborn as sns

In [16]:
data = pd.read_csv("./processed_data/curated_gene_disease_associations_autism_grouped.csv", sep='\t')
genes_df = pd.read_csv("../raw_data/gene_associations.tsv", sep='\t')

# Create nodes

In [17]:
# Create nodes for genes

# Prepare data
gene_ids = data['geneId'].unique()

# Create node list
nodes_genes = []

for gene_id in gene_ids:
    node = (gene_id, {
        'bipartite': 'gene'
    })
    nodes_genes.append(node)

nodes_genes

[(1, {'bipartite': 'gene'}),
 (2, {'bipartite': 'gene'}),
 (9, {'bipartite': 'gene'}),
 (10, {'bipartite': 'gene'}),
 (12, {'bipartite': 'gene'}),
 (13, {'bipartite': 'gene'}),
 (15, {'bipartite': 'gene'}),
 (16, {'bipartite': 'gene'}),
 (18, {'bipartite': 'gene'}),
 (19, {'bipartite': 'gene'}),
 (21, {'bipartite': 'gene'}),
 (22, {'bipartite': 'gene'}),
 (23, {'bipartite': 'gene'}),
 (24, {'bipartite': 'gene'}),
 (25, {'bipartite': 'gene'}),
 (26, {'bipartite': 'gene'}),
 (28, {'bipartite': 'gene'}),
 (29, {'bipartite': 'gene'}),
 (31, {'bipartite': 'gene'}),
 (32, {'bipartite': 'gene'}),
 (33, {'bipartite': 'gene'}),
 (34, {'bipartite': 'gene'}),
 (35, {'bipartite': 'gene'}),
 (36, {'bipartite': 'gene'}),
 (37, {'bipartite': 'gene'}),
 (38, {'bipartite': 'gene'}),
 (39, {'bipartite': 'gene'}),
 (40, {'bipartite': 'gene'}),
 (41, {'bipartite': 'gene'}),
 (43, {'bipartite': 'gene'}),
 (47, {'bipartite': 'gene'}),
 (48, {'bipartite': 'gene'}),
 (50, {'bipartite': 'gene'}),
 (51, {'bipar

In [18]:
# Create nodes for diseases
disease_ids = data['diseaseId'].unique()

nodes_diseases = []
for disease_id in disease_ids:
    node = (disease_id, {
        'bipartite': 'disease'
    })
    nodes_diseases.append(node)

nodes_diseases

[('C0019209', {'bipartite': 'disease'}),
 ('C0036341', {'bipartite': 'disease'}),
 ('C0002395', {'bipartite': 'disease'}),
 ('C0007102', {'bipartite': 'disease'}),
 ('C0009375', {'bipartite': 'disease'}),
 ('C0011265', {'bipartite': 'disease'}),
 ('C0011570', {'bipartite': 'disease'}),
 ('C0011581', {'bipartite': 'disease'}),
 ('C0019202', {'bipartite': 'disease'}),
 ('C0022660', {'bipartite': 'disease'}),
 ('C0023890', {'bipartite': 'disease'}),
 ('C0023893', {'bipartite': 'disease'}),
 ('C0024115', {'bipartite': 'disease'}),
 ('C0024121', {'bipartite': 'disease'}),
 ('C0027726', {'bipartite': 'disease'}),
 ('C0206669', {'bipartite': 'disease'}),
 ('C0239946', {'bipartite': 'disease'}),
 ('C0242379', {'bipartite': 'disease'}),
 ('C0276496', {'bipartite': 'disease'}),
 ('C0494463', {'bipartite': 'disease'}),
 ('C0546126', {'bipartite': 'disease'}),
 ('C0750900', {'bipartite': 'disease'}),
 ('C0750901', {'bipartite': 'disease'}),
 ('C1527352', {'bipartite': 'disease'}),
 ('C1565662', {'

# Create edges

In [19]:
edges_df = data[['geneId', 'diseaseId']].drop_duplicates()

edges = [(row['geneId'], row['diseaseId']) for i, row in edges_df.iterrows()]

edges

[(1, 'C0019209'),
 (1, 'C0036341'),
 (2, 'C0002395'),
 (2, 'C0007102'),
 (2, 'C0009375'),
 (2, 'C0011265'),
 (2, 'C0011570'),
 (2, 'C0011581'),
 (2, 'C0019202'),
 (2, 'C0022660'),
 (2, 'C0023890'),
 (2, 'C0023893'),
 (2, 'C0024115'),
 (2, 'C0024121'),
 (2, 'C0027726'),
 (2, 'C0206669'),
 (2, 'C0239946'),
 (2, 'C0242379'),
 (2, 'C0276496'),
 (2, 'C0494463'),
 (2, 'C0546126'),
 (2, 'C0750900'),
 (2, 'C0750901'),
 (2, 'C1527352'),
 (2, 'C1565662'),
 (2, 'C2239176'),
 (2, 'C2609414'),
 (2, 'C3279661'),
 (9, 'C0001973'),
 (9, 'C0004403'),
 (9, 'C0005684'),
 (9, 'C0005695'),
 (9, 'C0006826'),
 (9, 'C0008625'),
 (9, 'C0013221'),
 (9, 'C0027651'),
 (9, 'C0028796'),
 (9, 'C0033578'),
 (9, 'C0041755'),
 (9, 'C0086457'),
 (9, 'C0086692'),
 (9, 'C0376358'),
 (10, 'C0005684'),
 (10, 'C0005695'),
 (10, 'C0006142'),
 (10, 'C0010054'),
 (10, 'C0011999'),
 (10, 'C0013182'),
 (10, 'C0013221'),
 (10, 'C0019193'),
 (10, 'C0023452'),
 (10, 'C0023453'),
 (10, 'C0023530'),
 (10, 'C0023903'),
 (10, 'C0027794'

# Load graph

In [20]:
G = nx.Graph()

G.add_nodes_from(nodes_diseases)
G.add_nodes_from(nodes_genes)
G.add_edges_from(edges)

DegreeView({'C0019209': 43, 'C0036341': 1022, 'C0002395': 84, 'C0007102': 150, 'C0009375': 143, 'C0011265': 84, 'C0011570': 258, 'C0011581': 292, 'C0019202': 24, 'C0022660': 86, 'C0023890': 97, 'C0023893': 770, 'C0024115': 50, 'C0024121': 263, 'C0027726': 21, 'C0206669': 9, 'C0239946': 97, 'C0242379': 264, 'C0276496': 85, 'C0494463': 84, 'C0546126': 84, 'C0750900': 84, 'C0750901': 84, 'C1527352': 24, 'C1565662': 86, 'C2239176': 245, 'C2609414': 86, 'C3279661': 1, 'C0001973': 287, 'C0004403': 16, 'C0005684': 124, 'C0005695': 123, 'C0006826': 160, 'C0008625': 16, 'C0013221': 80, 'C0027651': 65, 'C0028796': 4, 'C0033578': 601, 'C0041755': 80, 'C0086457': 4, 'C0086692': 65, 'C0376358': 601, 'C0006142': 1025, 'C0010054': 65, 'C0011999': 28, 'C0013182': 35, 'C0019193': 315, 'C0023452': 49, 'C0023453': 49, 'C0023530': 8, 'C0023903': 136, 'C0027794': 28, 'C0027806': 28, 'C0080218': 28, 'C0152234': 28, 'C0152426': 29, 'C0266453': 28, 'C0344479': 28, 'C0345904': 141, 'C0345967': 109, 'C0678222':

In [26]:
btw_centrality = nx.betweenness_centrality(G)
btw_centrality

{'C0019209': 0.0014591892437047244,
 'C0036341': 0.0738883020255656,
 'C0002395': 0.0009688793366875857,
 'C0007102': 0.0035556471704856984,
 'C0009375': 0.003034651812669877,
 'C0011265': 0.0009688793366875857,
 'C0011570': 0.005522481242302719,
 'C0011581': 0.007837005933166667,
 'C0019202': 0.00021281714086901938,
 'C0022660': 0.0011946287050362417,
 'C0023890': 0.0015197256772792515,
 'C0023893': 0.0535687582765229,
 'C0024115': 0.0013184773128287904,
 'C0024121': 0.007199692003410254,
 'C0027726': 0.0002388308081657714,
 'C0206669': 1.4933495414804596e-05,
 'C0239946': 0.0015197256772792515,
 'C0242379': 0.007230805018164414,
 'C0276496': 0.000983439645728156,
 'C0494463': 0.0009688793366875857,
 'C0546126': 0.0009688793366875857,
 'C0750900': 0.0009688793366875857,
 'C0750901': 0.0009688793366875857,
 'C1527352': 0.00021281714086901938,
 'C1565662': 0.0011946287050362417,
 'C2239176': 0.008786079526498743,
 'C2609414': 0.0011946287050362417,
 'C3279661': 0.0,
 'C0001973': 0.01068

In [28]:
for n, d in G.nodes(data=True):
    d['betweenness_centrality'] = btw_centrality[n]

In [36]:
degrees = nx.degree(G)

for n, d in G.nodes(data=True):
    d['degree'] = degrees[n]

In [None]:
degree_cent = nx.degree_centrality(G)

for n, d in G.nodes(data=True):
    d['degree_centrality'] = degree_cent[n]

In [29]:
nx.write_graphml_lxml(G, "graphml/bipartite_network_all_disease_gene.graphml")