In [14]:
import pandas as pd
import numpy as np
import networkx as nx
import nxviz as nv
from nxviz import MatrixPlot, ArcPlot, CircosPlot
import matplotlib.pyplot as plt
import seaborn as sns

In [15]:
data = pd.read_csv("./processed_data/curated_gene_disease_associations_autism_grouped.csv", sep='\t')
autisms = data[data['diseaseId'].str.contains('A')]

In [16]:
autisms = autisms[['diseaseId', 'diseaseName', 'diseaseClass']].drop_duplicates()
autisms

Unnamed: 0,diseaseId,diseaseName,diseaseClass
100,A00,Autism Spectrum Disorder,F03
1952,A09,Hamartoma tumor Syndrome,C04;C16
3422,A02,Fragile X Syndrome,C10;C16;C23
4018,A16,Digeorge Syndrome,C05;C14;C15;C16;C19;C23
6110,A08,Neurofibromatosis,C04;C09;C10;C16;C17;C23
6111,A14,Noonan Syndrome,C04;C05;C10;C14;C16;C17;C23
6683,A06,Timothy Syndrome,C05;C14;C16;C23;F03
6846,A10,Down Syndrome,C04;C09;C10;C11;C15;C16;C23;F01;F03
14086,A07,Smith-Lemli-Opitz Syndrome,C16;C18
14294,A15,William Syndrome,C10;C14;C16


In [17]:
autisms['diseaseClass'] = autisms['diseaseClass'].apply(lambda x: x.split(';'))

In [18]:
autisms

Unnamed: 0,diseaseId,diseaseName,diseaseClass
100,A00,Autism Spectrum Disorder,[F03]
1952,A09,Hamartoma tumor Syndrome,"[C04, C16]"
3422,A02,Fragile X Syndrome,"[C10, C16, C23]"
4018,A16,Digeorge Syndrome,"[C05, C14, C15, C16, C19, C23]"
6110,A08,Neurofibromatosis,"[C04, C09, C10, C16, C17, C23]"
6111,A14,Noonan Syndrome,"[C04, C05, C10, C14, C16, C17, C23]"
6683,A06,Timothy Syndrome,"[C05, C14, C16, C23, F03]"
6846,A10,Down Syndrome,"[C04, C09, C10, C11, C15, C16, C23, F01, F03]"
14086,A07,Smith-Lemli-Opitz Syndrome,"[C16, C18]"
14294,A15,William Syndrome,"[C10, C14, C16]"


In [19]:
disease_classes = set()
for disease_class in autisms['diseaseClass']:
    disease_classes.update(set(disease_class))
disease_classes = sorted(list(disease_classes))
disease_classes

['C04',
 'C05',
 'C09',
 'C10',
 'C11',
 'C12',
 'C13',
 'C14',
 'C15',
 'C16',
 'C17',
 'C18',
 'C19',
 'C23',
 'F01',
 'F03']

In [20]:
disease_classes_dicts = [
    ('C04', 'neoplasms'),
    ('C05', 'musculoskeletal diseases'),
    ('C09', 'otorhinolaryngologic diseases'),
    ('C10', 'nervous system diseases'),
    ('C11', 'eye diseases'),
    ('C12', 'urologic and male genital diseases'),
    ('C13', 'female genital diseases and pregnancy complications'),
    ('C14', 'cardiovascular diseases'),
    ('C15', 'hemic and lymphatic diseases'),
    ('C16', 'congenital, hereditary, and neonatal diseases and abnormalities'),
    ('C17', 'skin and connective tissue diseases'),
    ('C18', 'nutritional and metabolic diseases'), 
    ('C19', 'endocrine system diseases'),
    ('C23', 'pathological conditions, signs and symptoms'),
    ('F01', 'behavior and behavior mechanisms'),
    ('F03', 'mental disorders')
]

In [21]:
G = nx.Graph()

# Add class nodes

In [22]:
for class_id, class_name in disease_classes_dicts:
    G.add_node(
        class_id,
        bipartite='disease_class',
        name=class_name)
G.nodes(data=True)

NodeDataView({'C04': {'bipartite': 'disease_class', 'name': 'neoplasms'}, 'C05': {'bipartite': 'disease_class', 'name': 'musculoskeletal diseases'}, 'C09': {'bipartite': 'disease_class', 'name': 'otorhinolaryngologic diseases'}, 'C10': {'bipartite': 'disease_class', 'name': 'nervous system diseases'}, 'C11': {'bipartite': 'disease_class', 'name': 'eye diseases'}, 'C12': {'bipartite': 'disease_class', 'name': 'urologic and male genital diseases'}, 'C13': {'bipartite': 'disease_class', 'name': 'female genital diseases and pregnancy complications'}, 'C14': {'bipartite': 'disease_class', 'name': 'cardiovascular diseases'}, 'C15': {'bipartite': 'disease_class', 'name': 'hemic and lymphatic diseases'}, 'C16': {'bipartite': 'disease_class', 'name': 'congenital, hereditary, and neonatal diseases and abnormalities'}, 'C17': {'bipartite': 'disease_class', 'name': 'skin and connective tissue diseases'}, 'C18': {'bipartite': 'disease_class', 'name': 'nutritional and metabolic diseases'}, 'C19': {'

# Add disease nodes

In [23]:
for _, row in autisms.iterrows():
    G.add_node(
        row['diseaseId'],
        bipartite='autism',
        name=row['diseaseName']
        )
list(G.nodes(data=True))[-1]

('A13', {'bipartite': 'autism', 'name': 'Charge Syndrome'})

# Add Edges 

In [24]:
for _, row in autisms.iterrows():
    for c in row['diseaseClass']:
        G.add_edge(row['diseaseId'], c)
G.edges()

EdgeView([('C04', 'A09'), ('C04', 'A08'), ('C04', 'A14'), ('C04', 'A10'), ('C04', 'A04'), ('C05', 'A16'), ('C05', 'A14'), ('C05', 'A06'), ('C05', 'A17'), ('C05', 'A11'), ('C09', 'A08'), ('C09', 'A10'), ('C10', 'A02'), ('C10', 'A08'), ('C10', 'A14'), ('C10', 'A10'), ('C10', 'A15'), ('C10', 'A01'), ('C10', 'A05'), ('C10', 'A04'), ('C10', 'A03'), ('C10', 'A12'), ('C10', 'A17'), ('C10', 'A11'), ('C11', 'A10'), ('C11', 'A11'), ('C12', 'A04'), ('C13', 'A04'), ('C14', 'A16'), ('C14', 'A14'), ('C14', 'A06'), ('C14', 'A15'), ('C15', 'A16'), ('C15', 'A10'), ('C16', 'A09'), ('C16', 'A02'), ('C16', 'A16'), ('C16', 'A08'), ('C16', 'A14'), ('C16', 'A06'), ('C16', 'A10'), ('C16', 'A07'), ('C16', 'A15'), ('C16', 'A01'), ('C16', 'A05'), ('C16', 'A04'), ('C16', 'A03'), ('C16', 'A12'), ('C16', 'A17'), ('C16', 'A11'), ('C16', 'A13'), ('C17', 'A08'), ('C17', 'A14'), ('C18', 'A07'), ('C18', 'A11'), ('C19', 'A16'), ('C23', 'A02'), ('C23', 'A16'), ('C23', 'A08'), ('C23', 'A14'), ('C23', 'A06'), ('C23', 'A10')

# Export

In [25]:
nx.write_graphml_lxml(G, "graphml/bipartite_disease_class.graphml")