In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import os
import numpy as np
from glob import glob
from itolapi import Itol
from IPython.display import Image

In [None]:
models = pd.read_csv('../data/model_list.tsv', sep='\t', usecols=[0,4])
models['org_id'] = models['file_path'].apply(lambda x: os.path.basename(x)[:-7])
models["node"] = models["assembly_accession"].apply(lambda x: x[0:3] + x[4:-2])
models = models[["org_id", "node"]].copy()

## Create binary dataset 

In [None]:
parameters = [
    "DATASET_BINARY\n",
    "SEPARATOR COMMA\n",
    "DATASET_LABEL,bq_rq\n",
    "FIELD_SHAPES,2,2\n",
    "FIELD_LABELS,bq,rq\n",
    "FIELD_COLORS,#ed7e17,#1ba055,\n",
    "MARGIN,25\n",
    "HEIGHT_FACTOR,30\n",
    "SYMBOL_SPACING,25\n",
    "DATA\n"
]

In [None]:
bq_orgs = pd.concat(pd.read_csv(filename, sep='\t', header=None, names=['community', 'org_id'])
                  for filename in glob('../communities/bin_rnd_01/*.tsv'))
bq_orgs["size"] = bq_orgs["community"].apply(lambda x: int(x.split("_")[1]))

In [None]:
rq_orgs = pd.concat(pd.read_csv(filename, sep='\t', header=None, names=['community', 'org_id'])
                  for filename in glob('../communities/bin_rnd_001/*.tsv'))
rq_orgs["size"] = rq_orgs["community"].apply(lambda x: int(x.split("_")[1]))

In [None]:
bq = bq_orgs.query("size >= 3").groupby("org_id", as_index=False).agg(
    {"community":len}).query("community >= 10")

rq = rq_orgs.query("size >= 3").groupby("org_id", as_index=False).agg(
    {"community":len}).query("community >= 10")

models["bq"] = models["org_id"].apply(lambda x: 1 if x in bq["org_id"].values else -1)
models["rq"] = models["org_id"].apply(lambda x: 1 if x in rq["org_id"].values else -1)

In [None]:
selected = models.query("bq == 1 or rq == 1")[["node", "bq", "rq"]]

In [None]:
with open("../iTOL/bq_rq_binary.txt", "w") as f:
    f.writelines(parameters)
    selected.to_csv(f, header=False, index=False)

## Create interactions dataset

In [None]:
def smt2iTOL(df, color="#000000"):

    df = df.groupby(["receiver", "donor"], as_index=False).agg({"smetana": np.sum})

    df = pd.merge(df, models, left_on="donor", right_on="org_id")
    df = pd.merge(df, models, left_on="receiver", right_on="org_id")

    df["color"] = color
    df["style"] = 'normal'
    df["label"] = 'label'
    return df[["node_x", "node_y", "smetana", "color", "style", "label"]].copy()

In [None]:
smt_bq = pd.concat(pd.read_csv(x, sep='\t') for x in glob("../simulation/bq_subsample/*.tsv"))
smt_rq = pd.concat(pd.read_csv(x, sep='\t') for x in glob("../simulation/rq_subsample/*.tsv"))

In [None]:
bq_itol = smt2iTOL(smt_bq, color='#ed7e17')
rq_itol = smt2iTOL(smt_rq, color='#1ba055')

In [None]:
connections = pd.concat([bq_itol, rq_itol]).query("smetana > 1")

In [None]:
parameters = [
    "DATASET_CONNECTION\n",
    "SEPARATOR COMMA\n",
    "DATASET_LABEL,interactions\n",
    "CENTER_CURVES,1\n",
    "ALIGN_TO_LABELS,1\n",
    "DRAW_ARROWS,1\n",
    "ARROW_SIZE,30\n",
    "MAXIMUM_LINE_WIDTH,10\n",
    "CURVE_ANGLE,0\n",
    "DATA\n"
]

In [None]:
with open("../iTOL/interactions.txt", "w") as f:
    f.writelines(parameters)
    connections.to_csv(f, index=False, header=False)

## Phylum colors

In [None]:
phyla = pd.read_csv("../iTOL/phyla_emp.tsv", sep="\t", na_values="not_annotated").dropna()

main_phyla = ["Proteobacteria", "Firmicutes", "Actinobacteria", "Bacteroidetes"]

phyla.loc[~phyla["phylum"].isin(main_phyla), "phylum"] = "Other"

colors = {
    "Proteobacteria": '#bc80bd', #purple
    "Firmicutes": '#fc5b5b', #red
    "Actinobacteria": '#80b1d3', #blue 
    "Bacteroidetes": '#ffed6f', #yellow 
    "Other": '#d9d9d9', #grey
}

phyla["color"] = phyla["phylum"].apply(lambda x: colors[x])

parameters = [
    "TREE_COLORS\n",
    "SEPARATOR COMMA\n",
    "DATA\n"
]


phyla["type"] = 'range'

with open("../iTOL/phylum_labels.txt", "w") as f:
    f.writelines(parameters)
    phyla[["gcf", "type", "color", "phylum"]].to_csv(f, index=False, header=False)

## Using API

In [None]:
itol_up = Itol()
itol_up.add_file('../iTOL/emp.tree.txt')
itol_up.add_file('../iTOL/node_labels.txt')
itol_up.add_file('../iTOL/phylum_labels.txt')
itol_up.add_file('../iTOL/phylum_names.txt')
itol_up.add_file('../iTOL/bq_rq_binary.txt')
itol_up.upload()
print(itol_up.comm.upload_output)
itol_ex = itol_up.get_itol_export()
itol_ex.set_export_param_value('arc', 320)
itol_ex.set_export_param_value('rotation', 110)
itol_ex.set_export_param_value('ignore_branch_length', 0)
itol_ex.set_export_param_value('inverted', 0)
itol_ex.set_export_param_value('format', 'png')
itol_ex.set_export_param_value('datasets_visible', "0,1")
itol_ex.export('../figures/fig_4a.png')

In [None]:
itol_up = Itol()
itol_up.add_file('../iTOL/emp.tree.txt')
itol_up.add_file('../iTOL/node_labels.txt')
itol_up.add_file('../iTOL/phylum_labels2.txt')
itol_up.add_file('../iTOL/interactions.txt')
itol_up.upload()
print(itol_up.comm.upload_output)
itol_ex = itol_up.get_itol_export()
itol_ex.set_export_param_value('arc', 320)
itol_ex.set_export_param_value('rotation', 110)
itol_ex.set_export_param_value('ignore_branch_length', 1)
itol_ex.set_export_param_value('inverted', 1)
itol_ex.set_export_param_value('format', 'png')
itol_ex.set_export_param_value('datasets_visible', "0,1")
itol_ex.set_export_param_value('horizontal_scale_factor', 0.3)
itol_ex.export('../figures/fig_4b.png')