Implement the ring-plots from the [MinGenome](https://pubs.acs.org/doi/10.1021/acssynbio.7b00296).

[Bokeh "Burtin" example](https://bokeh.pydata.org/en/latest/docs/gallery/burtin.html) may provide some helpful hints.


Question: Encode the protein value on the area or height?  How is that read?

Next steps:
* Set 0 to top
* Essetial/non-essential color encodeing (With A/B for local contrast)
* Use dataset from tutorial; wild-type control
* Predicted vs observed protein allocation
* Likes the 
* Generate the Gene block plot (left of the "results" seciton)

Jeremy Z to send:
* Predictions for each step
* Observations for each step
* Essential genes for each steps
* Genes we **should** be knokcing out at each step


In [3]:
import pandas as pd
import numpy as np

from collections import OrderedDict
from math import log, sqrt

from bokeh.plotting import figure, curdoc
from bokeh.io import show, output_notebook

from bokeh.models import ColumnDataSource, CategoricalColorMapper, Whisker, LabelSet, Div
from bokeh.models import HoverTool, BoxSelectTool, PanTool, WheelZoomTool, ResetTool, SaveTool, Dimensions
from bokeh.models import widgets
from bokeh.models import formatters 
from bokeh.models.ranges import FactorRange
from bokeh.models.formatters import PrintfTickFormatter, FuncTickFormatter

from bokeh.transform import factor_cmap, linear_cmap, dodge

from bokeh import events
from bokeh.layouts import column, row, widgetbox, layout, gridplot
from bokeh import palettes

from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid
from bokeh.models.glyphs import HBar
from bokeh.transform import factor_cmap
from bokeh.palettes import PiYG, Spectral6, Category10, Category20_20

import colorcet
import os
output_notebook()
%matplotlib inline

In [9]:
import bokeh
bokeh.__version__

'0.12.16'

In [5]:
synbioAI = os.path.join(os.environ['HOME'], 'Projects/E.coli/synbio-ai')
genes = pd.read_csv(os.path.join(synbioAI, "KHK_collection/E_coli_metadata.txt"), 
                    sep="\t")[["gene", "locus", "start", "stop"]]
genes = genes[~genes["locus"].str.startswith("u")].reset_index(drop=True)
genes = genes.drop_duplicates().set_index("locus").sort_values("start")
genes = genes[~genes.index.duplicated(keep="first")]
genes = genes.assign(center=(genes["stop"]-genes["start"])/2+ genes["start"],
                     seq=np.arange(genes.shape[0]))

max_loc = genes.stop.max()
genes = genes.assign(start_pct = genes.start/max_loc,
                     stop_pct = genes.stop/max_loc,
                     center_pct = genes.center/max_loc)

genes = genes.assign(start_rad = np.radians(genes.start_pct*360),
                     stop_rad = np.radians(genes.stop_pct*360),
                     center_rad = np.radians(genes.center_pct*360))

genes = genes.drop(["start_pct", "stop_pct", "center_pct"], axis="columns")
genes.head()

Unnamed: 0_level_0,gene,start,stop,center,seq,start_rad,stop_rad,center_rad
locus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
b0001,thrL,189,255,222.0,0,0.000256,0.000345,0.0003
b0002,thrA,336,2799,1567.5,1,0.000454,0.003785,0.00212
b0003,thrB,2800,3733,3266.5,2,0.003786,0.005048,0.004417
b0004,thrC,3733,5020,4376.5,3,0.005048,0.006789,0.005918
b0005,yaaX,5233,5530,5381.5,4,0.007077,0.007478,0.007277


In [10]:
size = 400
inner_radius = size/2.01
outer_radius = size/2

data = genes.assign(ab = (genes.seq%2).apply(str))

source = ColumnDataSource(data)

span = outer_radius*1.01

p = figure(plot_width=size, plot_height=size,
            x_range=(span, -span), y_range=(span,-span),
            title="A/B Seq Map")

cmap = factor_cmap('ab', ["#F05974", "#260D75"], ["0", "1"])
p.annular_wedge(source=source,
                x=0,y=0, inner_radius=inner_radius, outer_radius=outer_radius,
                start_angle="start_rad", end_angle="stop_rad",
                color=cmap)

show(p)

In [11]:
size = 400
inner_radius = size/2.1
outer_radius = size/2

data = genes

source = ColumnDataSource(data)

span = outer_radius*1.01

p = figure(plot_width=size, plot_height=size,
            x_range=(span, -span), y_range=(span,-span),
            title="Rainbow Seq Map")

#cmap = linear_cmap('seq', colorcet.b_cyclic_mrybm_35_75_c68, low=0, high=data.seq.max())
#cmap = linear_cmap('seq', colorcet.b_cyclic_mygbm_30_95_c78, low=0, high=data.seq.max())
cmap = linear_cmap('seq', colorcet.b_cyclic_mygbm_30_95_c78, low=0, high=data.seq.max())
p.annular_wedge(source=source,
                x=0,y=0, inner_radius=inner_radius, outer_radius=outer_radius,
                start_angle="start_rad", end_angle="stop_rad",
                color=cmap, fill_alpha=.5, line_alpha=.75)

show(p)

In [13]:
def fix_ac_name(n):
    try:
        parts = n.split("_")
        v = int(parts[0][2:])
        return "_".join([f"AC{v:02}"] + parts[1:])
    except:
        return n

protein_to_locus = pd.read_csv(os.path.join(synbioAI,"Sources/E_coli_W3110_lambdaRed_Blattner_annotations.tab"),
                               sep="\t", 
                               header=None, names=["locus", "gene", "blattner"]).set_index("blattner")
rel_abundance = pd.read_csv(os.path.join(synbioAI,
                                         "Sources/PNNL_RelativeAbundance_KHK10_TwoOutliersRemoved_Log2_MedianCentered.csv"))
rel_abundance = protein_to_locus.join(rel_abundance.set_index("Row_ID"), how="inner").set_index("locus")
rel_abundance.columns = [fix_ac_name(n) for n in rel_abundance.columns]
rel_abundance.head()

Unnamed: 0_level_0,gene,AC01_control_1_01,AC01_control_2_02,AC01_control_3_03,AC01_cumate_1_04,AC01_cumate_3_06,AC13_control_1_19,AC13_control_3_21,AC13_cumate_1_22,AC13_cumate_2_23,...,AC08_control_3_09,AC08_cumate_1_10,AC08_cumate_2_11,AC08_cumate_3_12,AC09_control_1_13,AC09_control_2_14,AC09_control_3_15,AC09_cumate_1_16,AC09_cumate_2_17,AC09_cumate_3_18
locus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
b0002,thrA,31.231474,31.11097,31.069793,31.354147,31.133642,32.88131,32.805251,32.659772,32.600059,...,32.052148,32.340617,32.35007,32.119732,32.062637,32.071408,32.167375,32.346559,32.248836,32.16378
b0103,coaE,26.724728,27.013718,27.082461,27.29954,26.814524,27.038524,26.622915,27.411091,27.247894,...,26.862031,27.256155,26.933075,26.652343,26.051161,26.818144,27.304423,26.972126,26.619162,26.446126
b1045,ymdB,27.968188,29.482713,26.368014,27.871183,27.851636,27.366084,30.33337,29.900245,29.968924,...,26.904261,27.985653,30.167878,29.67932,29.891365,30.101711,30.214733,27.332817,30.309798,27.782753
b0104,guaC,32.822284,32.870137,32.626494,32.494871,32.236021,32.879375,32.98349,32.266197,32.456273,...,32.993001,32.529763,32.409755,32.612574,33.214381,33.24312,33.360798,32.50388,32.579549,32.618016
b1048,mdoG,30.672394,30.475251,31.027852,31.44536,31.332391,31.257079,30.954798,31.595682,31.595267,...,31.018138,30.94453,30.873033,30.929804,31.198822,31.159787,31.146078,31.002445,31.244579,31.146552


In [14]:
size = 400
inner_radius = size/10
outer_radius = size/2

condition = "AC01_control_1_01"
data = genes.join(rel_abundance[condition], how="left")\
            .fillna(0)\
            .assign(ab = (genes.seq%2).apply(str))

data = data.assign(value = data["AC01_control_1_01"]+inner_radius)\
            .drop("AC01_control_1_01", axis="columns")

source = ColumnDataSource(data)

span = data.value.max()

p = figure(plot_width=size, plot_height=size,
            x_range=(span, -span), y_range=(span,-span),
            title="Measured Value; A/B Sequence Map")

cmap = factor_cmap('ab', ["#F05974", "#260D75"], ["0", "1"])
p.annular_wedge(source=source,
                x=0,y=0, inner_radius=inner_radius, outer_radius="value",
                start_angle="start_rad", end_angle="stop_rad",
                color=cmap)

show(p)

In [15]:

size = 400
inner_radius = size/10
outer_radius = size/2

condition = "AC01_control_1_01"
vs = 2**rel_abundance[condition]
vs = vs/vs.max()*100
data = genes.join(vs, how="left")\
            .fillna(0)\
            .assign(ab = (genes.seq%2).apply(str))

data = data.assign(value = data["AC01_control_1_01"]+inner_radius)\
            .drop("AC01_control_1_01", axis="columns")

source = ColumnDataSource(data)

span = data.value.max()

p = figure(plot_width=size, plot_height=size,
            x_range=(span, -span), y_range=(span,-span),
            title="Percent of total Mass; A/B Encoded")

cmap = factor_cmap('ab', ["#F05974", "#260D75"], ["0", "1"])
p.annular_wedge(source=source,
                x=0,y=0, inner_radius=inner_radius, outer_radius="value",
                start_angle="start_rad", end_angle="stop_rad",
                color=cmap)

show(p)


In [16]:
size = 400
inner_radius = size/2.2
outer_radius = size/2

reference = "AC01_control_1_01"
condition = "AC01_cumate_1_04"

delta = (rel_abundance[condition]-rel_abundance[reference]).rename("delta")*(inner_radius*.1)
data = genes.join(delta, how="left")\
            .fillna(0)

data = data.assign(value = data["delta"]+inner_radius)
source = ColumnDataSource(data)

span = data.value.max()

p = figure(plot_width=size, plot_height=size,
            x_range=(span, -span), y_range=(span,-span),
          title="Delta reference; Diverging Linear Color map")

ext = data.value.abs().max()
#cmap = linear_cmap('delta', colorcet.b_diverging_gwv_55_95_c39, low=-ext, high=ext)
#cmap = linear_cmap('delta', colorcet.b_diverging_bwr_55_98_c37, low=-ext, high=ext)
cmap = linear_cmap('delta', colorcet.b_diverging_bwr_40_95_c42, low=-ext, high=ext)

p.annular_wedge(source=source,
                x=0,y=0, inner_radius=inner_radius, outer_radius="value",
                start_angle="start_rad", end_angle="stop_rad",
                color=cmap)

show(p)

In [17]:
size = 400
inner_radius = size/2.2
outer_radius = size/2

reference = "AC01_control_1_01"
condition = "AC01_cumate_1_04"

delta = (rel_abundance[condition]-rel_abundance[reference]).rename("delta")*(inner_radius*.1)
data = genes.join(delta, how="left")\
            .fillna(0)\
            .assign(pn = delta.apply(lambda v: "+" if v > 0 else "-"))

data = data.assign(value = data["delta"]+inner_radius)
source = ColumnDataSource(data)

span = data.value.max()

p = figure(plot_width=size, plot_height=size,
            x_range=(span, -span), y_range=(span,-span),
            title="Delta Reference; Pos/Neg Categorical Map; ")

ext = data.value.abs().max()
cmap = factor_cmap('pn', ["#E02E4E", "#4D31A5"], ["+", "-"])

#cmap = linear_cmap('delta', colorcet.b_diverging_gwv_55_95_c39, low=-ext, high=ext)

p.annular_wedge(source=source,
                x=0,y=0, inner_radius=inner_radius, outer_radius="value",
                start_angle="start_rad", end_angle="stop_rad",
                color=cmap, fill_alpha=.25, line_alpha=.5)

show(p)