# Network visualization

This notebook constructs a network visualization connecting bacterial apecies to metabolic compounds.

In [None]:
# Preliminaries
%matplotlib inline

import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from IPython.display import display, HTML

def widen_notebook():
    display(HTML("<style>.container { width:100% !important; }</style>"))
widen_notebook()

In [None]:
# data files
!ls ../data

In [None]:
edges_txt = "../data/edges.txt"
metabolites_txt = "../data/metabolite_metadata.txt"
microbes_txt = "../data/microbe_metadata.txt"

In [None]:
# Read data files into lists of dictionaries
def split_commas(line):
    return line.strip().split(",")

def CSVtodicts(filename):
    f = open(filename)
    result = []
    headers = split_commas(f.readline())
    for line in f.readlines():
        values = split_commas(line)
        dictionary = dict(zip(headers, values))
        result.append(dictionary)
    return result

edges = CSVtodicts(edges_txt)
metabolites = CSVtodicts(metabolites_txt)
microbes = CSVtodicts(microbes_txt)

len(edges), len(metabolites), len(microbes)

In [None]:
# Convert numeric column
def float_column(name, dicts):
    for d in dicts:
        d[name] = float(d[name])

ew = "edge_weight"
float_column(ew, edges)
edges[10]

In [None]:
d_col = 'diagnosis[T.nonIBD]'
float_column(d_col, metabolites)
metabolites[100]

In [None]:
float_column(d_col, microbes)
microbes[111]

In [None]:
# Data distribution histograms and color mappings
from jp_gene_viz import color_scale

def column_histogram(dictionaries, title, column=d_col):
    weights = [m[column] for m in dictionaries]
    num_bins = 10
    n, bins, patches = plt.hist(weights, num_bins, facecolor='blue', alpha=0.5)
    plt.title(title)
    plt.show()
    low_color = color_scale.clr(255,0,0)
    high_color = color_scale.clr(0,123,255)
    colorizer = color_scale.ColorInterpolator(low_color, high_color, min(weights), max(weights))
    return colorizer

microbe_colorizer = column_histogram(microbes, "Microbe weights")
microbe_colorizer.interpolate_color(0.22)

In [None]:
metabolite_colorizer = column_histogram(metabolites, "Metabolite weights")
metabolite_colorizer.interpolate_color(0.22)

In [None]:
_ = column_histogram(edges, "Edge weights", ew)

In [None]:
# Abbreviate names.
abbreviations = {}
rabbrev = {}

def abbreviate(name, limit=12):
    result = name
    if name in abbreviations:
        return abbreviations[name]
    if len(name) > limit:
        if "|" in name:
            result = name.split("|")[-1]
        if "__" in result:
            result = name.split("__")[-1]
        if "_" in result:
            chunks = result.split("_")
            chunks = [c[0:1].upper()+c[1:4].lower() for c in chunks]
            result = "".join(chunks)
    result = result[:limit]
    x = result
    count = 0
    while result in rabbrev:
        count+=1
        result = x + ":" + str(count)
    abbreviations[name] = result
    rabbrev[result] = name
    return result

abbreviate('k__Viruses|p__Viruses_noname|c__Viruses_noname|o__Viruses_noname')

In [None]:
# name abbreviation mappings.
def name_dict(dicts):
    return dict([abbreviate(d[""]), d] for d in dicts)

name2microbe = name_dict(microbes)
name2microbe.items()[15]

In [None]:
name2metabolite = name_dict(metabolites)
name2metabolite.items()[32]

In [None]:
# Construct the network graph from the edges.
from jp_gene_viz import dGraph
G = dGraph.WGraph()
for e in edges:
    G.add_edge(abbreviate(e["microbe"]), abbreviate(e["metabolite"]), e["edge_weight"], e)

In [None]:
# Construct the network widget from the graph
from jp_gene_viz import dNetwork
dNetwork.load_javascript_support()
N = dNetwork.NetworkDisplay()
N.load_data(G)

In [None]:
# Configure and display the network

N.labels_button.value = True
N.size_slider.value = 1000

# colorize the nodes based on weights (hacky, sorry)
dg = N.display_graph
for node_name in dg.node_weights:
    svg_name = dg.node_name(node_name)
    if node_name in name2metabolite:
        d = name2metabolite[node_name]
        value = d[d_col]
        N.color_overrides[svg_name] = metabolite_colorizer.interpolate_color(value)
        #print svg_name, N.color_overrides[svg_name]
        #break
    else:
        d = name2microbe[node_name]
        value = d[d_col]
        N.color_overrides[svg_name] = microbe_colorizer.interpolate_color(value)

N.draw()
N.show()