This script following step0_NAFLD.ipynb plots personalized networks

In [1]:
# This script following step0_NAFLD.ipynb plots personalized networks
import sys
sys.path.append('../network')
import network
import warnings
import pandas as pd
import os
import copy
import svg_draw as sd
from reportlab.graphics import renderPDF, renderPM
from svglib.svglib import svg2rlg
warnings.filterwarnings('ignore')

In [2]:
taxonomy = pd.read_csv('../data/NAFLD/taxonomy.tsv', header=0, sep='\t')
taxonomy = taxonomy.set_index('Abb.')

for c in set(taxonomy['Family']):
    print(c, len(taxonomy[taxonomy['Family']==c]))

colors = {'Bacteroidaceae': '#85D696', 
          'Lachnospiraceae': '#FFC68A', 
          'Ruminococcaceae': '#999FDB', 
          'Family XI': '#CC99DB', 
          'other': '#AAAAAA'}

colors_reverse = {}
colors_dict = {}
phy_dict = {}
for phy, color in colors.items():
    colors_dict[color] = 0
    colors_reverse[color] = phy
    phy_dict[phy] = []
total_num = taxonomy.shape[0]

for sp in taxonomy.index:
    phy = taxonomy.loc[sp, 'Family']
    #print(phy, sp)
    if phy not in colors.keys():
        phy = 'other'
    colors_dict[colors[phy]] += 1
    phy_dict[phy].append(sp)

rnode = 5
width, origin, polar_df = network.assign_pos(total_num, rnode, margin=0)

Desulfovibrionaceae 1
Bifidobacteriaceae 1
Porphyromonadaceae 2
Campylobacteraceae 1
Streptococcaceae 1
Bacteroidaceae 21
Burkholderiaceae 1
Enterobacteriaceae 1
Veillonellaceae 1
Rikenellaceae 3
Tannerellaceae 2
Lachnospiraceae 30
Peptostreptococcaceae 2
Leuconostocaceae 2
Ruminococcaceae 18
Family XIII 1
Family XI 7
Prevotellaceae 1
Marinifilaceae 1


In [3]:
def complete_df(layout, phy_dict, colors_reverse):
    col_order = ['id', 'theta', 'r', 'color', 'fam']
    for idx in layout.index:
        color = layout.loc[idx, 'color'] 
        phy = colors_reverse[color]
        #print(color, phy)
        layout.loc[idx, 'id'] = phy_dict[phy].pop()
        layout.loc[idx, 'fam'] = phy
    return layout[col_order]

In [4]:
layout = network.sector_layout(colors_dict, polar_df)
complete_df(layout, copy.deepcopy(phy_dict), colors_reverse).to_csv("NAFLD_layout.tsv", sep='\t', index=None)
print("width = {}\norigin = {},{}\nr = {}".format(width, origin[0], origin[1], rnode))


width = 210
origin = 105.0,105.0
r = 5


In [5]:
def draw_network(fr, keystone_df, layout_df, max_edge_width = 2, max_r = 8, min_width = 1, cutoff=0.9):
    origin = (110, 110)
    common_taxon = list(set(fr.index).intersection(set(layout_df.index)))
    fr = fr.loc[common_taxon, common_taxon]
    layer0_df = keystone_df[keystone_df['layer'] == 0]
    keystone_cluster = list(keystone_df[(keystone_df['layer'] == 1) & (keystone_df['is_keystone']==True)]['leaves'])[0].split(',')
    keystone_cluster = [x.split('s__')[-1] for x in keystone_cluster]
    circles = ''
    keystone = ''
    pr_scores = layer0_df['PR_score']
    pr_max = max(pr_scores)
    pr_range = pr_max
    xy_df = pd.DataFrame()
    for idx in common_taxon:
        theta = layout_df.loc[idx, 'theta']
        r = layout_df.loc[idx, 'r']
        color = layout_df.loc[idx, 'color']
        x, y = network.convert(theta, r, origin)
        xy_df.loc[idx, 'x'] = x
        xy_df.loc[idx, 'y'] = y
        pr = layer0_df.loc[idx, 'PR_score']
        if idx in keystone_cluster:
            stroke = 'black'
            swidth = 2
        else:
            stroke = 'black'
            swidth = 1
        if layer0_df.loc[idx, 'is_keystone'] == True:
            keystone = idx
            circle = sd.draw_circle(x, y, swidth=swidth, stroke=stroke, r=max_r, fill="red", id=idx, style=True)
        else:
            r = max_r*pr/pr_range
            circle = sd.draw_circle(x, y, swidth=swidth, stroke=stroke, r=r, fill=color, id=idx, style=True)
        circles +=  circle
    
    paths = ""
    ntaxon = fr.shape[0]
    taxon = list(fr.index)
    for i in range(ntaxon):
        taxa1 = taxon[i]
        x1 = xy_df.loc[taxa1, 'x']
        y1 = xy_df.loc[taxa1, 'y']
        for j in range(i+1, ntaxon):
            taxa2 = taxon[j]
            fr_value = fr.loc[taxa1, taxa2]
            if fr_value > 0.9:
                x2 = xy_df.loc[taxa2, 'x']
                y2 = xy_df.loc[taxa2, 'y']
                id = "{}_{}".format(taxa1, taxa2)
                x3, y3 = network.compute_convex((x1, y1), (x2, y2))
                edge_width = max_edge_width*(fr_value-cutoff)/(1-cutoff)
                if taxa1 == keystone or taxa2 == keystone:
                    path = sd.draw_curve(x1, y1, x3, y3, x2, y2, stroke='red', swidth=edge_width, id=id, style=True)
                else:
                    path = sd.draw_curve(x1, y1, x3, y3, x2, y2, swidth=edge_width, id=id, style=True)
                paths += path
    title = '<text x="0" y="20">{}</text>'.format(keystone)
    title = ''
    return title + paths + circles

In [6]:
legend = '<circle cx="250" cy="20" r="12" stroke="black" stroke-width="1" fill="red" /> \n \
<text x="280" y="25" class="legend">keystone</text>\n \
<circle cx="250" cy="50" r="12" stroke="black" stroke-width="1" fill="#85D696" />\n \
<text x="280" y="55" class="legend">Bacteroidaceae</text>\n \
<circle cx="250" cy="80" r="12" stroke="black" stroke-width="1" fill="#FFC68A" />\n \
<text x="280" y="85" class="legend">Lachnospiraceae</text>\n \
<circle cx="250" cy="110" r="12" stroke="black" stroke-width="1" fill="#999FDB" />\n \
<text x="280" y="115" class="legend">Ruminococcaceae</text>\n \
<circle cx="250" cy="140" r="12" stroke="black" stroke-width="1" fill="#CC99DB" />\n \
<text x="280" y="145" class="legend">Family XI</text>\n \
<circle cx="250" cy="170" r="12" stroke="black" stroke-width="1" fill="#AAAAAA" />\n \
<text x="280" y="175" class="legend">Other</text>'

def draw(content, legend):
    width = 400
    height = 240
    return sd.canvas(width, height, content+legend)

In [7]:
def reindex(fr_df):
    rename = {}
    for idx in fr_df.index:
        if idx.startswith('s__'):
            rename[idx] = idx.split('s__')[-1]
    return rename

In [9]:

layout_df = pd.read_csv('NAFLD_layout.tsv', sep='\t', header=0, index_col=0)
outer_dir = '../result/NAFLD/'


for dir in os.listdir(outer_dir):
    if os.path.isfile(os.path.join(outer_dir, dir)):
        continue
    keystone_df = pd.read_csv(os.path.join(outer_dir, dir, 'keystone_node.tsv'), sep='\t', header=0, index_col=0)
    fr_df = pd.read_csv(os.path.join(outer_dir, dir, 'layer_0', 'fr.tsv'), sep='\t', header=0, index_col=0)
    rename = reindex(fr_df)
    fr_df.rename(index=rename, columns=rename, inplace=True)
    keystone_df.rename(index=rename, inplace=True)
    s_sector= draw(draw_network(fr_df, keystone_df, layout_df), legend)
    svg_dir = os.path.join(outer_dir, dir, 'network.svg')
    with open(svg_dir, 'w') as fp:
        fp.write(s_sector)

    drawing = svg2rlg(svg_dir)
    pdf_path = os.path.join(outer_dir, dir, 'network.pdf')
    renderPDF.drawToFile(drawing, pdf_path)
