In [1]:
import pandas as pd
import json
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from scipy.stats import gaussian_kde
import random

In [2]:
output_notebook()

In [3]:
# Load strain genotypes
with open("../Data/Mutation_data/Strain_to_genes.json") as infile:
    genotypes = json.load(infile)
genotypes["MG1655"] = set()

In [4]:
# Load cross tolerance and M9 growth rates
growth_data = pd.read_csv("../Data/Growth_data/Cross_tolerance/Cleaned_growth_rates.tsv", sep="\t", index_col=0)
growth_data = growth_data[growth_data["strain"].isin(genotypes)]  # Filter hypermutators out

m9_data = pd.read_csv("../Data/Growth_data/M9/M9_normalised_growth.csv")
m9_data = m9_data[m9_data["strain"].isin(genotypes)]  # Filter hypermutators out

In [5]:
df = pd.concat([growth_data, m9_data])
del df["exp_name"]
del df["tOD1"]

compounds = {"adipate": "ADIP",
            "hexanoate": "HEXA",
            "glutarate": "GLUT",
            "HMDA": "HMDA",
            "1,2-propanediol": "12PD",
            "2,3-butanediol": "23BD",
            "octanoate": "OCTA",
            "butanol": "BUT",
            "putrescine": "PUTR",
            "isobutyrate": "IBUA",
            "coumarate": "COUM"}

reverse_compounds = {value[0:3]: key for key, value in compounds.items()}

def strain_to_compound(strain):
    if strain == "MG1655":
        return None
    else:
        return reverse_compounds[strain[:3]]
    
df["evolved_compound"] = df["strain"].map(strain_to_compound)

In [76]:
short_df = df.groupby(["strain", "grown_compound"]).mean().reset_index().pivot("strain", "grown_compound", "growth_rate")
rel_short_df = df.groupby(["strain", "grown_compound"]).mean().reset_index().pivot("strain", "grown_compound", "rel_growth_rate")
min_rate, max_rate = np.nanmin(short_df.values), np.nanmax(short_df.values)
short_df["evolved_compound"] = short_df.index.map(strain_to_compound)
rel_short_df["evolved_compound"] = rel_short_df.index.map(strain_to_compound)

## Make growth overview violin plot

In [7]:
def prettify(string):
    if string == string.lower():
        return string.capitalize()
    else:
        return string

In [16]:
# colors = plotting.color_range(3, 0.6, 1, offset=0.25)
colors = ['#99ff32', '#3299ff', '#ff3299']

categories = ["M9", "HMDA", "putrescine", "1,2-propanediol", "2,3-butanediol", "glutarate", "adipate", "hexanoate",
             "octanoate", "isobutyrate", "coumarate", "butanol", "NaCl"]
fig = figure(title=None, x_range=[prettify(s) for s in categories],
             width=900,
             y_range=[min_rate, max_rate*1.1]
            )

def gauss_max(mean, std, limit=None):
    if limit is None:
        limit = 2.5*std
    num = random.gauss(mean, std)
    while abs(num-mean) > limit:
        num = random.gauss(mean, std)
    return num

for i, name in enumerate(categories):
    dat_array = short_df[~np.isnan(short_df[name])][name].values
    kernel = gaussian_kde(dat_array)
    num_range = np.arange(dat_array.min()-0.05, dat_array.max()+0.05, (dat_array.max()-dat_array.min())/100)
    heights = kernel(num_range)
    heights = heights / (3*heights.max())
    y = list(num_range) + list(num_range)[::-1]
    x = list(i+1-heights) + list(i+1+heights[::-1])
    fig.patch(x, y, color="#dddddd")
        
for i, name in enumerate(categories):
    data = short_df[(short_df["evolved_compound"] != name) & (short_df.index != "MG1655")]
    dat_array = np.array(data[name])
    dat_list = list(data[name])
    x_list = [i+1+gauss_max(0, 0.1) for _ in dat_list]
    fig.circle(x_list, dat_list, fill_alpha=1, line_alpha=0,  fill_color=colors[1], legend="Cross adapted")
        
for i, name in enumerate(categories):
    data = short_df[short_df["evolved_compound"] == name]
    dat_array = np.array(data[name])
    dat_list = list(data[name])
    x_list = [i+1+gauss_max(0, 0.1) for _ in dat_list]
    fig.circle(x_list, dat_list, size=5, fill_alpha=1, line_alpha=1,  fill_color=colors[2], legend="Specific adapted")
        
data = short_df[short_df.index == "MG1655"]
for i, name in enumerate(categories):
    dat_array = np.array(data[name])
    dat_list = list(data[name])
    x_list = [i+1 for _ in dat_list]
    fig.circle(x_list, dat_list, size=10, fill_alpha=1, line_alpha=1,  fill_color=colors[0], legend="MG1655")


fig.xaxis.major_label_orientation = 0.7
fig.yaxis.axis_label = "Growth rate (strain mean) (1/h)"
fig.xaxis.axis_label = "Growth condition"
fig.xgrid.grid_line_color = None
fig.axis.axis_label_text_font_size = "22px"
fig.axis.axis_label_text_font_style = "normal"
fig.legend.border_line_alpha = 0.8
fig.legend.border_line_color = "black"
fig.xaxis.major_label_text_font_size = "13px"

show(fig)

## Make cross-tolerance heat map

In [170]:
def lprettify(l):
    return [prettify(_) for _ in l]


def two_color_func(val):
    """
    Blue is higher, red is lower. 0 is white.
    """
    a = 240
    assert -1 <= val <= 1, val
    if val <= 0:
        val = -val
        #val = val**1.2
        red = a + (255-a)*val
        green = a - (a*0.9)*val #+ (150-a)*val
        blue = a - (a*0.9)*val #+ (150-a)*val
    else:
        #val = val**1.2
        red = a - (a*0.8)*val
        green = a - (a*0.8)*val
        blue = a + (255-a)*val
    col = "#%2.2x%2.2x%2.2x" % (int(red), int(green), int(blue))
    return col


def cat_dot_plot(df, color_df=None, fig=None, legend_min="", legend_max=""):
    plot_df = df.copy()
    min_size = 0.08
    max_size = 1
    plot_df = plot_df - plot_df.values.min() + 0.08
    plot_df = plot_df / plot_df.values.max() * 0.75
    if color_df is None:
        color_df = plot_df
    else:
        color_df = color_df.copy()
    x_cats = list(df)
    y_cats = list(df.index)
    
    if fig is None:
        fig = figure(x_range=lprettify(x_cats)+[""]*2, y_range=lprettify(y_cats), width=1000, height=850)
    
    fig.quad(left=0.5, right=2.5, bottom=10.5, top=12.48, color="#e2e2e2")
    fig.quad(left=2.5, right=4.5, bottom=8.5, top=10.5, color="#e2e2e2")
    fig.quad(left=4.5, right=6.5, bottom=6.5, top=8.5, color="#e2e2e2")
    fig.quad(left=6.5, right=8.5, bottom=4.5, top=6.5, color="#e2e2e2")
    fig.line([0, 11.4], [13, 1.6], color="darkblue", line_alpha=0.4)
    
    xs = []
    ys = []
    sizes = []
    colors = []
    for i, y in enumerate(y_cats):
        for j, x in enumerate(x_cats):
            xs.append(j + 1)
            ys.append(i + 1)
            circle_size = np.sqrt(plot_df.iloc[i, j]) / 2 * 1.2
            sizes.append(circle_size)
            colors.append(two_color_func(color_df.iloc[i, j]))
    fig.circle(xs, ys, radius=sizes, color=colors, line_color="#666666", line_width=0.5)
    #fig.text(xs, ys, sizes)
    
    # Add color legend
    leg_pos = (15, 9)
    reso = 50
    mini = color_df.values.min()
    maxi = color_df.values.max()
    color_ran = np.linspace(mini, maxi, reso)
    leg_height = 3
    for i in range(reso):
        fig.rect(
            x=leg_pos[0],
            y=leg_pos[1]+leg_height/float(reso)*i,
            height=leg_height/float(reso),
            width=0.5,
            color=two_color_func(color_ran[i])
        )
    fig.text(
        x=[leg_pos[0]-1.2, leg_pos[0]-1.2],
        y=[leg_pos[1], leg_pos[1]-0.3+leg_height],
        text=[str(round(legend_min, 2)) + " /h", str(round(legend_max, 2)) + " /h"],
        text_font_size="12px"
    )

    fig.line([len(x_cats)+0.55, len(x_cats)+0.55], [0, len(y_cats)+1], color="black", line_width=0.1)
    
    fig.xaxis.major_label_orientation = 1
    fig.grid.grid_line_color = None
    return fig

In [159]:
index_order = [
    "HMDA", "putrescine", "1,2-propanediol", "2,3-butanediol",
    "glutarate", "adipate", "hexanoate", "octanoate", "isobutyrate", "coumarate", "butanol", "None (MG1655)"]

column_order = [
    "HMDA", "putrescine", "1,2-propanediol", "2,3-butanediol",
    "glutarate", "adipate", "hexanoate", "octanoate", "isobutyrate", "coumarate", "butanol", "NaCl"]

heatmap_df = short_df.copy()
heatmap_df.loc["MG1655", "evolved_compound"] = "None (MG1655)"
heatmap_df = heatmap_df.groupby("evolved_compound").mean()
heatmap_df = heatmap_df.reindex(index_order[::-1])[column_order]
heatmap_df["M9"] = df[df["grown_compound"] == "M9"].fillna("None (MG1655)").groupby("evolved_compound").mean()["growth_rate"]
#heatmap_df = np.log(heatmap_df)

In [160]:
normalisation_means = heatmap_df.loc["None (MG1655)", :]
color_df = (heatmap_df - normalisation_means)
print(color_df.values.min(), color_df.values.max())
legend_min, legend_max = color_df.values.min(), color_df.values.max()

color_df /= max(abs(color_df.values.max()), abs(color_df.values.min()))

-0.28770755661 0.3793892026


In [171]:
plot = cat_dot_plot(heatmap_df, color_df, legend_min=legend_min, legend_max=legend_max)

plot.xaxis.axis_label = "Growth condition"
plot.yaxis.axis_label = "Evolved compound"

plot.axis.axis_label_text_font_size = "18pt"
plot.axis.major_label_text_font_size = "16pt"
plot.xaxis.major_label_orientation = 0.8

plot.axis.axis_label_text_font_style = "normal"

show(plot)