Implement the ring-plots from the [MinGenome](https://pubs.acs.org/doi/10.1021/acssynbio.7b00296).

[Bokeh "Burtin" example](https://bokeh.pydata.org/en/latest/docs/gallery/burtin.html) may provide some helpful hints.


Question: Encode the protein value on the area or height?  How is that read?

Next steps:
* Set 0 to top  (DONE)
* Essential/non-essential color encodeing (With A/B for local contrast)
* Use dataset from tutorial; wild-type control (DONE)
* Predicted vs observed protein allocation 
* Generate the Gene block plot (left of the "results" seciton) (DONE)

Jeremy Z to send:
* Predictions for each step (recieved)
* Observations for each step (recieved)
* Essential genes for each steps (recieved)
* Genes we **should** be knocking out at each step 


In [None]:
import pandas as pd
import numpy as np

from collections import OrderedDict
from math import log, sqrt

import bokeh
from bokeh.plotting import figure, curdoc
from bokeh.io import show, output_notebook, export_png

from bokeh.models import ColumnDataSource, CategoricalColorMapper, Whisker, LabelSet, Div
from bokeh.models import HoverTool, BoxSelectTool, PanTool, WheelZoomTool, ResetTool, SaveTool, ColorBar
from bokeh.models import widgets
from bokeh.models import formatters 
from bokeh.models.ranges import FactorRange
from bokeh.models.formatters import PrintfTickFormatter, FuncTickFormatter

from bokeh.transform import factor_cmap, linear_cmap, dodge

from bokeh import events
from bokeh.layouts import column, row, widgetbox, layout, gridplot
from bokeh import palettes

from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid
from bokeh.models.glyphs import HBar
from bokeh.transform import factor_cmap
import bokeh.palettes as palettes
from bokeh.palettes import PiYG, Spectral6, Category10, Category20_20

import colorcet

output_notebook()
%matplotlib inline

In [None]:
bokeh.__version__
#TODO: When version is 1.0.4 then try bokeh/latex: https://bokeh.pydata.org/en/latest/docs/user_guide/extensions_gallery/latex.html

In [None]:
from math import ceil 

def square_shape(N, sh=1, sw=1):
    #https://stackoverflow.com/questions/339939/stacking-rectangles-to-into-the-most-square-like-arrangement-possible
    cols = round(sqrt(N * sh / sw))
    rows = ceil(N / cols)
    return rows, cols

In [None]:
knockouts = pd.read_csv("./Blattner_ecoli_kos.tab")
knockouts.columns.values[0] = "locus"
knockouts = knockouts.set_index("locus")
knockouts.head()

In [None]:
observed = pd.read_csv("E.coli_avg_iBAQ_mass_pct_induced_v_uninduced.tab", sep="\t").set_index("Blattner").drop("index", axis="columns")
observed.head()

In [None]:
predicted = pd.read_csv("./protein_mass_percent_predicted_from_previous_step.csv").set_index("level_0")
predicted.tail()

In [None]:
genes = pd.read_csv("./E_coli_metadata.txt", sep="\t")[["gene", "locus", "start", "stop"]]
genes = genes[~genes["locus"].str.startswith("unique")].reset_index(drop=True)
genes = genes.set_index("locus").sort_values("start")
genes = genes[~genes.index.duplicated(keep="first")]
genes = genes.assign(center=(genes["stop"]-genes["start"])/2+ genes["start"],
                     seq=np.arange(genes.shape[0]))

max_loc = genes.stop.max()
genes = genes.assign(start_pct = genes.start/max_loc,
                     stop_pct = genes.stop/max_loc,
                     center_pct = genes.center/max_loc)

genes = genes.assign(stop_rad = -np.radians(genes.start_pct*360-90),
                     start_rad = -np.radians(genes.stop_pct*360-90),
                     center_rad = -np.radians(genes.center_pct*360-90))

genes = genes.drop(["start_pct", "stop_pct", "center_pct"], axis="columns")
genes.head()

In [None]:
from functools import reduce

def export(p, filename, multiplier=4):
    "Rescale components & fonts. Save to file"

    def depath(obj, path, default): 
        try: return reduce(lambda acc, v: acc[v], path, obj)
        except: return default

    def mpt(obj, path, default):
        try: 
            v = depath(obj, path, default)
            return "{}pt".format(str(int(v[:-2])*multiplier))
        except: return "{}pt".format(default*multiplier)
    
    p.toolbar.logo = None
    p.toolbar_location = None
    p.title.text_font_size = mpt(p, ["title", "text_font_size"], 12)
    p.yaxis.major_label_text_font_size = mpt(p, ["yaxis", "major_label_text_font_size"], 10)

    
    cb = p.select({"name": "colorbar"})
    if cb is not None:
        cb.title_text_font_size = mpt(cb, ["title_text_font_size"], 10)
        cb.major_label_text_font_size = mpt(cb, ["major_label_text_font_size"], 10)
        cb.label_standoff = depath(cb, ["label_standoff"], 10)*multiplier
        cb.title_standoff = depath(cb, ["title_standoff"], 10)*multiplier

    export_png(p, filename,
               height=depath(p, ["height"], 400)*multiplier, 
               width=depath(p, ["width"], 400)*multiplier)

# Gene Block

In [None]:
#Isolate data
gene_drop = genes.join(knockouts.drop(['gene', 'E_coli_W3110'], axis="columns") , how="left")\
                 .drop(["stop_rad", "start_rad", "center_rad", "center"], axis="columns")
rows, cols = square_shape(gene_drop.seq.max())
gene_drop = gene_drop.assign(col = gene_drop.seq%cols, row=-(gene_drop.seq//rows),
                             start = gene_drop.start/1000,
                             stop = gene_drop.stop/1000)

#gene_drop.head()

In [None]:
# Prep image
#rows, cols = square_shape(end)
img = np.full((rows, cols), np.nan).ravel()

for start, stop, v in gene_drop[["start", "stop", "Step"]].values:
    img[int(start):int(stop)] =v

img = np.flipud(img.reshape((rows, cols)))

In [None]:
#Visualization
def block_diagram():
    round_step_top = ((gene_drop.Step.max()//10)+1)*10  #TODO: Should I use this rounded-up number or just max?  
    cmap = linear_cmap('Step', palettes.Viridis256 , low=0, high=round_step_top, 
                       nan_color='#efefef')["transform"]

    p = figure(width=500, height=400, x_range=(0,cols), y_range=(rows,0), 
               title="Gene Deletions: Step & Location",
               tooltips=[("", "@image")])

    p.yaxis.formatter = PrintfTickFormatter(format="%d kbp")
    p.xaxis.visible=False
    p.image(image=[img], x=0, y=rows, dw=cols, dh=rows, color_mapper=cmap)

    colorbar = ColorBar(color_mapper=cmap, location=(0,0), title="step", name="colorbar")
    p.add_layout(colorbar, "right")
    return p

p = block_diagram()
show(p)
export(p, "gene_block.png")

# Protein Rings

In [None]:
def ring_plot(data, cmap, *, title="", rng=(-10, 10), out_steps=3):
    size = 400
    inner_radius = size/4
    
    if "outer" not in data.columns: data = data.assign(outer=size/100)
    if "inner" not in data.columns: data = data.assign(inner=0)
     
    data = data.assign(inner_radius=data.inner+inner_radius)\
               .assign(outer_radius=data.outer+inner_radius)
        
    source = ColumnDataSource(data)

    span = data.outer_radius.abs().max()

    p = figure(plot_width=size, plot_height=size,
                x_range=(span, -span), y_range=(span,-span),
                title=title)
    
    p.axis.visible = False
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None

    p.annular_wedge(source=source,
                    x=0,y=0, 
                    inner_radius="inner_radius", outer_radius="outer_radius",
                    start_angle="start_rad", end_angle="stop_rad",
                    color=cmap)
    
    # "Nice" radial labels
    out_radii = np.linspace(inner_radius, data.outer_radius.max(), out_steps)
    in_radii = np.linspace(0, inner_radius, int(inner_radius/(out_radii[1]-out_radii[0])))

    radii = np.concatenate([in_radii, out_radii])
    out_labels = np.linspace(0, rng[1], out_radii.shape[0])
    in_labels = np.linspace(-3*out_labels[1], 0, in_radii.shape[0])
    labels = np.concatenate([in_labels, out_labels])

    cutoff = np.where(labels < rng[0])[0]
    cutoff=cutoff[-1] if cutoff.shape[0] > 0 else 0
    
    p.circle(0, 0, radius=radii[cutoff:], fill_color=None, line_color="lightgray")
    p.text(0, radii[cutoff:], [f"{r:9.2f}" for r in labels[cutoff:]],
           text_font_size="8pt", text_align="center", text_baseline="middle")

    return p

In [None]:
data = genes.assign(ab = (genes.seq%2).apply(str))\
            .assign(outer=np.linspace(1,10, genes.shape[0]))
    
cmap = factor_cmap('ab', ["#F05974", "#260D75"], ["0", "1"])

show(ring_plot(data, cmap, title="A/B Seq Map"))

In [None]:
bokeh.io.export_png(ring_plot(data, cmap, title="A/B Seq Map"))

In [None]:
observed.columns

In [None]:
from bokeh.layouts import gridplot
def grid(plots, size=150):
    rows, cols = square_shape(len(plots))
    return gridplot(plots, ncols=cols, plot_width=size, plot_height=size)


In [None]:
def single_condition(condition, *, base=None, scale=10000):
    vs = condition if base is None else base**condition
    vs = vs/vs.sum()*scale

    data = genes.join(vs, how="left")\
                .fillna(0)\
                .assign(ab = (genes.seq%2).apply(str))\
                .rename({condition.name:"outer"}, axis="columns")

    cmap = factor_cmap('ab', ["#F05974", "#260D75"], ["0", "1"])

    plot =  ring_plot(data, cmap)
    plot.title.text=condition.name
    return plot

show(single_condition(observed["W3110_control"], base=2))


In [None]:
def delta_plot(focus, reference, genes, *, scale=500):
    delta = (focus - reference).rename("delta")*scale
    data = genes.join(delta, how="left")\
                .fillna(0)
    data = data.assign(pn = data["delta"].apply(lambda v: "+" if v > 0 else "-"))\
                .rename({"delta": "outer"}, axis="columns")

    cmap = factor_cmap('pn', ["#E02E4E", "#4D31A5"], ["+", "-"])

    plot =  ring_plot(data, cmap, title = f"{focus.name} vs {reference.name}")
    
    return plot

In [None]:
show(delta_plot(observed["Step_06_control"], observed["W3110_control"], genes))

In [None]:
g = grid([single_condition(observed[condition]) for condition in observed.columns])
show(g)

In [None]:
show(single_condition(predicted[predicted.columns[-6]], base=2))


In [None]:
step_predictions = """Step04_predicted_from_WT
Step05_predicted_from_Step04
Step05_predicted_from_WT
Step09_predicted_from_WT
Step10_predicted_from_Step09
Step10_predicted_from_WT""".split('\n')
plots = []
for step_predicted in step_predictions:
    step_measured = 'iBAQ_{}'.format(step_predicted[:6])
    positive_mask = (predicted[step_predicted].apply(np.log2) - predicted[step_measured].apply(np.log2)) > 0
    negative_mask = ~positive_mask
    over_predicted = predicted[positive_mask][step_predicted].sum()
    under_predicted = predicted[negative_mask][step_predicted].sum()
    net_predicted = over_predicted - under_predicted
    print('Overpredicted: {:.0%}\nUnderpredicted: {:.0%}\nNet predicted: {:.0%}\n'.format(over_predicted,
                                                                             under_predicted,
                                                                             net_predicted))
    p = delta_plot(predicted[step_predicted].apply(np.log2).fillna(0), 
                predicted[step_measured].apply(np.log2).fillna(0),
                genes, scale=5 )
    plots.append(p)
    show(p)
    export(p, '{}_vs_{}.png'.format( step_predicted, 
                                     step_measured ))
grid_plot = grid(plots)
#show(grid_plot)
#export(grid_plot, 'grid_plot.png')

In [None]:
from IPython.display import Latex, HTML
Latex(predicted.columns[-6])

In [None]:
for i in range(6):
    display(Latex(predicted.columns[-6+i]))

In [None]:
print('\n'.join(predicted.columns))

In [None]:
novel_predictions = pd.read_csv("genome_view.tab", sep="\t")
novel_predictions = novel_predictions[novel_predictions["$gene_or_promoter"].str.startswith("b")]\
                        .rename({"$gene_or_promoter": "gene_or_promoter",
                                 "mass reclaimed $(fg/cell)$": "mass reclaimed (fg/cell)"}, axis="columns")\
                        .set_index("gene_or_promoter")

novel_predictions
novel_predictions.head()

In [None]:
p= single_condition(novel_predictions["mass reclaimed (fg/cell)"])
show(p)
export(p, "reclaimed.png")