# Parse growth rates

This notebook reads the output from croissance and extracts the relevant growth rates and associates them with the right strains using the plate layouts.

In [1]:
from croissance_parsing import parse_plate_data # Local module in this directory
import pandas as pd
import xlrd
import json

## Initial screens (Biolector)

In [2]:
data_dir = "../Data/Growth_data/initial_tolerance_tests/"
screen_exps = {
    'butanol': 'tolerance_012714_butanol_coumarate.xlsx',
    'glutarate': 'tolerance_021014_coumarate_glutarate.xlsx',
    'coumarate': 'tolerance_021014_coumarate_glutarate.xlsx',
    'putrescine': 'tolerance_021114_adipic-acid_putrescine.xlsx',
    'HMDA': 'tolerance_012814_HDMA_6-aminohexanoic.xlsx',
    'adipate': 'tolerance_031914_adipate_propionate.xlsx',
    'isobutyrate': 'tolerance_040214_2,5-furandicarboxylate_isobutyrate.xlsx',
    'hexanoate': 'tolerance_021214_2,3-butanediol_hexanoic-acid.xlsx',
    '2,3-butanediol': 'tolerance_021214_2,3-butanediol_hexanoic-acid.xlsx',
    '1,2-propanediol': 'tolerance_032914_1,2-pentanediol_1,2-propanediol.xlsx',
    'octanoate': 'tolerance_081914_octanoate_caffeate.xlsx',
}

In [3]:
def parse_content(string):
    """Parse the media content of a well in the excel layout file"""
    string = string.replace("%", " %")
    try:
        conc, unit, comp, *junk = string.split()
    except ValueError:
        raise ValueError(string)
    assert unit.lower() in ("g/l", "v/v", "%"), unit
    conc = float(conc)
    return conc, unit, comp

# Read the excel files
layouts = {}
for comp, filename in screen_exps.items():
    layouts[comp] = {}
    workbook = xlrd.open_workbook(data_dir+"raw Biolector data/"+filename)
    sheet = workbook.sheet_by_name("subtracted")
    for well, content in zip(sheet.col_slice(0), sheet.col_slice(2)):
        well, content = well.value, content.value
        if well == "":
            continue
        # Skip any cells containing "pentane" to not confuse 12-propanediol with 12-pentanediol
        if comp[:4] in content and "pentane" not in content: 
            layouts[comp][well] = parse_content(content)

In [11]:
# Parse the growth phases from the croissance output
growth_rates = {}
for comp, name in screen_exps.items():
    growth_rates[comp] = {}
    name = name.split(".")[0] + ".OD.v2.output.json"
    with open(data_dir + "output/" + name) as infile:
        json_data = json.load(infile)
        curves = json_data["curves"]
    data = parse_plate_data(
        curves,
        phase_length_cutoff=3,
        max_abs_baseline=4,
        max_baseline_dev=4,
        time_cutoff=20,
        verbose=False
    )
    for well in layouts[comp]:
        growth_rates[comp][well] = data[well]['slope']

In [7]:
data = []
for comp in layouts:
    for well, (conc, unit, med) in layouts[comp].items():
        data.append(
            {"compound": comp, "conc": conc,
             "growth_rate": growth_rates[comp][well],
             "medium": med, "unit": unit, "well": well}
        )
df = pd.DataFrame(data)
df.to_csv("../Data/Growth_data/initial_tolerance_tests/Initial_tolerance_data_frame.tsv", sep="\t", index=None)

## Evolved isolate screening (Biolector)

In [16]:
data_dir = "../Data/Growth_data/evolved-isolate-growth-data/"
exp_list = {
    'ALE_1,2-propanediol_duplicates_121514': "1,2-propanediol",
    'ALE_2,3-butanediol_duplicates_122414': "2,3-butanediol",
    'ALE_adipate_duplicates_091014': "adipate",
    'ALE_butanol_duplicates_060514': "butanol",
    'ALE_coumarate_duplicates_061914': "coumarate",
    'ALE_glutarate_duplicates_060214': "glutarate",
    'ALE_HDMA_duplicates_080114': "HMDA",
    'ALE_hexanoate_duplicates_101314': "hexanoate",
    'ALE_isobutyrate_duplicates_091114': "isobutyrate",
    'ALE_octanoate_duplicates_122214': "octanoate",
    'ALE_putrescine_duplicates_073014': "putrescine",
}

In [17]:
layouts = {}
for exp, comp in exp_list.items():
    date = exp.split("_")[-1]
    excel_name = exp + ".xlsx"
    layouts[exp] = {}
    workbook = xlrd.open_workbook(data_dir + "raw Biolector data/" + excel_name)
    sheet = workbook.sheet_by_name("subtracted")
    for well, strain in zip(sheet.col_slice(0), sheet.col_slice(2)):
        well, strain = well.value, strain.value
        if well == "":
            continue
        layouts[exp][well] = strain

In [18]:
growth_rates = {}
for exp in exp_list:
    growth_rates[exp] = {}
    with open(data_dir + "output/" + exp + ".v2.output.json") as f:
        json_data = json.load(f)
    curves = json_data["curves"]
    data = parse_plate_data(
        curves,
        phase_length_cutoff=3,
        max_abs_baseline=3,
        max_baseline_dev=3,
        verbose=False
    )
    for well in layouts[exp]:
        growth_rates[exp][well] = data[well]["slope"]

In [None]:
def parse_strain(strain):
    if strain.startswith("MG1655"):
        strain = strain.replace("-", "_")
    strain, repl = strain.split("_", 1)
    repl = int(repl)
    return strain, repl

data = []
for exp, comp in exp_list.items():
    for well, strain in layouts[exp].items():
        strain, repl = parse_strain(strain)
        if "HDMA" in strain:
            strain = strain.replace("HDMA", "HMDA")
        gr = growth_rates[exp][well]
        data.append(
            {"compound": comp, "strain": strain, "growth_rate": gr,
             "experiment": exp, "well": well, "repl": repl}
        )
df = pd.DataFrame(data)
df.to_csv("../Data/Growth_data/evolved-isolate-growth-data/Evolved_isolates_data_frame.tsv",)