# Import

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import networkx as nx
import pygraphviz
from networkx.drawing.nx_agraph import graphviz_layout

# Gathering edges

In [None]:
# Load daughter-father file
edges = pd.read_csv('../../data/final/disease_parent_treenumbers.csv')

# Rename
edges.rename(columns={'Diease_TreeNumber':'Disease', 'Parent_TreeNumber': 'Parent'}, inplace=True)

# Echo
edges.head()

# Gathering nodes

In [None]:
#Load labels
nodes = pd.read_csv('../../data/final/disease_tree_heading_count.csv')

# Rename columns
nodes.rename(columns={
    'Diease_TreeNumber':'Disease',
    'Disease_Mesh_Heading':'Label',
    'Disease_Count':'Counts'}, inplace=True)

# Rescale counts
nodes.Counts = 1+np.log(1+nodes.Counts)

# Echo
nodes.head()

# Construct graph

In [None]:
# Construct whole Hierachical tree (Arrow from parent to daughter)
g = nx.from_pandas_edgelist(edges, 
                            source='Parent', 
                            target='Disease', 
                            create_using=nx.DiGraph)

# Add nodes attributes - Labels
labels = nodes['Disease Label'.split()].set_index('Disease').to_dict()['Label']
nx.set_node_attributes(g, labels, 'Label')

# Add nodes attributes - Counts
counts = nodes['Disease Counts'.split()].set_index('Disease').to_dict()['Counts']
nx.set_node_attributes(g, counts, 'Counts')

# Save to gephi
nx.write_gexf(g, 'Hierarchy.gexf')

# Echo info
print('  Size (Nodes): ', g.size())
print(' Order (Edges): ', g.order())
print(' Graph Density: ', nx.density(g))

In [None]:
# Get positions from graphviz_layout
pos = graphviz_layout(g, prog='sfdp', args='')

# Plot
plt.figure(figsize=(10, 10))
nx.draw(g, pos, node_size=5, alpha=0.5, node_color="blue", with_labels=False)
plt.axis('equal')
plt.show()

# End