In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from os.path import join, dirname, basename, exists, isdir

### Load environmental variables from the project root directory ###
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

# now you can get the variables using their names

# Check whether a network drive has been specified
DATABASE = os.environ.get("NETWORK_URL")
if DATABASE == 'None':
    pass
else:
    pass
    #mount network drive here

# set up directory paths
CURRENT_DIR = os.getcwd()
PROJ = dirname(dotenv_path) # project root directory

DATA = join(PROJ, 'data') #data directory
RAW_EXTERNAL = join(DATA, 'raw_external') # external data raw directory
RAW_INTERNAL = join(DATA, 'raw_internal') # internal data raw directory
INTERMEDIATE = join(DATA, 'intermediate') # intermediate data directory
FINAL = join(DATA, 'final') # final data directory

RESULTS = join(PROJ, 'results') # output directory
FIGURES = join(RESULTS, 'figures') # figure output directory
PICTURES = join(RESULTS, 'pictures') # picture output directory


# make folders specific for certain data
folder_name = ''
if folder_name != '':
    #make folders if they don't exist
    if not exists(join(RAW_EXTERNAL, folder_name)):
        os.makedirs(join(RAW_EXTERNAL, folder_name))

    if not exists(join(INTERMEDIATE, folder_name)):
        os.makedirs(join(INTERMEDIATE, folder_name))

    if not exists(join(FINAL, folder_name)):
        os.makedirs(join(FINAL, folder_name))


print('Standard variables loaded, you are good to go!')

Standard variables loaded, you are good to go!


In [40]:
import cobra
import os
import pandas as pd
import cameo
import wget
import ssl


#E. coli model:
#eColi_model = cameo.load_model("iML1515")


#E. coli model:
ssl._create_default_https_context = ssl._create_unverified_context
wget.download("https://raw.githubusercontent.com/BenjaSanchez/notebooks/master/e_coli_simulations/eciML1515.xml")
eColi_Model = cobra.io.read_sbml_model("eciML1515.xml")
os.remove("eciML1515.xml")



#proteomics data:
proteomics_dataset = "~/Documents/masters/thesis/thesis/data/raw_internal/proteomics/protein_values.csv"


In [45]:
from collections import namedtuple
from cobra.medium.boundary_types import find_external_compartment
from cobra.io.dict import reaction_to_dict
import pandas as pd
import numpy as np

def apply_medium(model, is_ec_model, medium):
    operations = []
    warnings = []
    errors = []

    # Convert the list of dicts to a set of namedtuples to avoid duplicates, as
    # looking up metabolites in the model is a somewhat expensive operation.
    Compound = namedtuple("Compound", ["id", "namespace"])
    medium = set(Compound(id=c["identifier"], namespace=c["namespace"]) for c in medium)

    # Add trace metals
    medium.update(
        [
            Compound(id="CHEBI:25517", namespace="chebi"),
            Compound(id="CHEBI:25368", namespace="chebi"),
        ]
    )

    try:
        extracellular = find_external_compartment(model)
    except RuntimeError as error:
        # cobrapy throws RuntimeError if it for any reason is unable to find an
        # external compartment. See:
        # https://github.com/opencobra/cobrapy/blob/95d920d135fa824e6087f1fcbc88d50882da4dab/cobra/medium/boundary_types.py#L26
        message = (
            f"Cannot find an external compartment in model {model.id}: {str(error)}"
        )
        errors.append(message)
        # Cannot continue without knowing the external compartment, so
        # immediately return the error.
        return operations, warnings, errors

    # Create a map of exchange reactions and corresponding fluxes to apply to
    # the medium.
    medium_mapping = {}
    for compound in medium:
        print(compound)
        try:
            extracellular_metabolite = find_metabolite(
                model, compound.id, compound.namespace, extracellular
            )
        except MetaboliteNotFound:
            warning = (
                f"Cannot add medium compound '{compound.id}' - metabolite not found in "
                f"extracellular compartment '{extracellular}'"
            )
            warnings.append(warning)
        else:
            exchange_reactions = extracellular_metabolite.reactions.intersection(
                model.exchanges
            )
            if is_ec_model and len(exchange_reactions) == 2:
                exchange_reactions = get_ec_exchange_reaction(exchange_reactions, True)
            if len(exchange_reactions) != 1:
                errors.append(
                    f"Medium compound metabolite '{extracellular_metabolite.id}' has "
                    f"{len(exchange_reactions)} exchange reactions in the model; "
                    f"expected 1"
                )
                continue
            exchange_reaction = next(iter(exchange_reactions))

            # If someone already figured out the uptake rate for the compound, it's
            # likely more accurate than our assumptions, so keep it
            if exchange_reaction.id in model.medium:
                medium_mapping[exchange_reaction.id] = model.medium[
                    exchange_reaction.id
                ]
                continue

            if not extracellular_metabolite.formula:
                warning = (
                    f"No formula for metabolite '{extracellular_metabolite.id}', cannot"
                    f" check if it is a carbon source"
                )
                warnings.append(warning)
                # If we don't know, it's most likely that the metabolite does not have a
                # higher uptake rate than a carbon source, so set the bound still to 10
                medium_mapping[exchange_reaction.id] = 10
            elif "C" in extracellular_metabolite.elements:
                # Limit the uptake rate for carbon sources to 10
                medium_mapping[exchange_reaction.id] = 10
            else:
                medium_mapping[exchange_reaction.id] = 1000

    # Apply the medium to the model, letting cobrapy deal with figuring out the correct
    # bounds to change
    model.medium = medium_mapping

    # Add all exchange reactions to operations, to make sure any changed bounds is
    # properly updated
    for reaction in model.exchanges:
        operations.append(
            {
                "operation": "modify",
                "type": "reaction",
                "id": reaction.id,
                "data": reaction_to_dict(reaction),
            }
        )

    return operations, warnings, errors


def apply_measurements(
    model,
    biomass_reaction,
    is_ec_model,
    fluxomics,
    metabolomics,
    proteomics,
    uptake_secretion_rates,
    molar_yields,
    growth_rate,
):
    operations = []
    warnings = []
    errors = []

    def bounds(measurement, uncertainty):
        """Return resolved bounds based on measurement and uncertainty"""
        if uncertainty:
            return (measurement - uncertainty, measurement + uncertainty)
        else:
            return (measurement, measurement)

    # If an enzyme constrained model with proteomics was supplied, flexibilize the
    # proteomics data and redefine the growth rate based on simulations.
    if growth_rate and proteomics and is_ec_model:
        growth_rate, proteomics, prot_warnings = flexibilize_proteomics(
            model, biomass_reaction, growth_rate, proteomics
        )
        for warning in prot_warnings:
            warnings.append(warning)

    # Constrain the model with the observed growth rate
    if growth_rate:
        reaction = model.reactions.get_by_id(biomass_reaction)
        reaction.bounds = bounds(growth_rate["measurement"], growth_rate["uncertainty"])
        operations.append(
            {
                "operation": "modify",
                "type": "reaction",
                "id": reaction.id,
                "data": reaction_to_dict(reaction),
            }
        )

    for measure in fluxomics:
        try:
            reaction = model.reactions.get_by_id(measure["identifier"])
        except KeyError:
            errors.append(
                f"Cannot find reaction '{measure['identifier']}' in the model"
            )
        else:
            reaction.bounds = bounds(measure["measurement"], measure["uncertainty"])
            operations.append(
                {
                    "operation": "modify",
                    "type": "reaction",
                    "id": reaction.id,
                    "data": reaction_to_dict(reaction),
                }
            )

    for metabolite in metabolomics:
        warning = (
            f"Cannot apply metabolomics measure for '{metabolite['identifier']}'; "
            f"feature has not yet been implemented"
        )
        warnings.append(warning)

    for measure in proteomics:
        if is_ec_model:
            try:
                reaction = model.reactions.get_by_id(
                    f"prot_{measure['identifier']}_exchange"
                )
            except KeyError:
                warning = f"Cannot find protein '{measure['identifier']}' in the model"
                warnings.append(warning)
            else:
                # measurement only modifies the upper bound (enzymes can be unsaturated)
                lb, ub = bounds(measure["measurement"], measure["uncertainty"])
                reaction.bounds = 0, ub
                operations.append(
                    {
                        "operation": "modify",
                        "type": "reaction",
                        "id": reaction.id,
                        "data": reaction_to_dict(reaction),
                    }
                )
        else:
            warning = (
                f"Cannot apply proteomics measurements for "
                f"non enzyme-constrained model {model.id}"
            )
            warnings.append(warning)
            break

    for rate in uptake_secretion_rates:
        try:
            metabolite = find_metabolite(
                model, rate["identifier"], rate["namespace"], "e"
            )
        except MetaboliteNotFound as error:
            errors.append(str(error))
        else:
            exchange_reactions = metabolite.reactions.intersection(model.exchanges)
            if is_ec_model and len(exchange_reactions) == 2:
                exchange_reactions = get_ec_exchange_reaction(
                    exchange_reactions, rate["measurement"] < 0
                )
            if len(exchange_reactions) != 1:
                errors.append(
                    f"Measured metabolite '{metabolite['identifier']}' has "
                    f"{len(exchange_reactions)} exchange reactions in the model; "
                    f"expected 1"
                )
                continue
            exchange_reaction = next(iter(exchange_reactions))
            lower_bound, upper_bound = bounds(rate["measurement"], rate["uncertainty"])

            # data is adjusted assuming a forward exchange reaction, i.e. x -->
            # (sign = -1), so if we instead actually have --> x, then multiply with -1
            direction = exchange_reaction.metabolites[metabolite]
            if direction > 0:
                lower_bound, upper_bound = -1 * lower_bound, -1 * upper_bound
            exchange_reaction.bounds = lower_bound, upper_bound
            operations.append(
                {
                    "operation": "modify",
                    "type": "reaction",
                    "id": exchange_reaction.id,
                    "data": reaction_to_dict(exchange_reaction),
                }
            )

    for molar_yield in molar_yields:
        warning = (
            f"Cannot apply molar yield measurement for '"
            f"{molar_yield['product_identifier']}/{molar_yield['substrate_identifier']}"
            f"'; feature has not yet been implemented"
        )
        warnings.append(warning)
    return operations, warnings, errors


def flexibilize_proteomics(model, biomass_reaction, growth_rate, proteomics):
    # reset growth rate in model:
    model.reactions.get_by_id(biomass_reaction).bounds = (0, 1000)

    # build a table with protein ids, met ids in model and values to constrain with:
    prot_df = pd.DataFrame()
    for protein in proteomics:
        protein_id = protein["identifier"]
        lb, ub = bounds(protein["measurement"], protein["uncertainty"])
        
        for met in model.metabolites.query(lambda m: protein_id in m.id):
            new_row = pd.DataFrame(
                data={"met_id": met.id, "value": ub}, index=[protein_id]
            )
            prot_df = prot_df.append(new_row)

    # constrain the model with all proteins and optimize:
    
    limit_proteins(model, prot_df["value"])
    solution = model.optimize()
    new_growth_rate = solution.objective_value

    # while the model cannot grow to the desired level, remove the protein with
    # the highest shadow price:
    minimal_growth, ub = bounds(growth_rate["measurement"], growth_rate["uncertainty"])
    prots_to_remove = []
    warnings = []
    while new_growth_rate < minimal_growth and not prot_df.empty:
        # get most influential protein in model:
        top_protein = top_shadow_prices(solution, list(prot_df["met_id"]))
        value = top_protein[top_protein.index[0]]
        top_protein = top_protein.index[0]
        top_protein = prot_df.index[prot_df["met_id"] == top_protein][0]
        print("working: " + top_protein + " (sp=" + str(value) + ") - mu = " + str(new_growth_rate))

        # update data: append protein to list, remove from current dataframe and
        # increase the corresponding upper bound to +1000:
        prots_to_remove.append(top_protein)
        prot_df = prot_df.drop(labels=top_protein)
        limit_proteins(model, pd.Series(data=[1000], index=[top_protein]))
        warning = (
            f"Removed protein '{top_protein}' from the proteomics data for feasible "
            f"simulations"
        )
        warnings.append(warning)

        # re-compute solution:
        solution = model.optimize()
        if solution.objective_value == new_growth_rate:  # the algorithm is stuck
            break
        new_growth_rate = solution.objective_value

    # update growth rate if optimization was not successful:
    if new_growth_rate < minimal_growth:
        if growth_rate["uncertainty"]:
            growth_rate["measurement"] = new_growth_rate + growth_rate["uncertainty"]
        else:
            growth_rate["measurement"] = new_growth_rate

    # update proteomics by removing flexibilized proteins:
    for protein in prots_to_remove:
        index = next(
            (
                index
                for (index, dic) in enumerate(proteomics)
                if dic["identifier"] == protein
            ),
            None,
        )
        del proteomics[index]

    return growth_rate, proteomics, warnings


def limit_proteins(model, measurements):
    for protein_id, measure in measurements.items():
        try:
            rxn = model.reactions.get_by_id(f"prot_{protein_id}_exchange")
        except KeyError:
            pass
        else:
            # update only upper_bound (as enzymes can be unsaturated):
            rxn.bounds = (0, measure)
    return


def top_shadow_prices(solution, met_ids, top=1):
    shadow_pr = solution.shadow_prices
    shadow_pr = shadow_pr.loc[shadow_pr.index.isin(met_ids)]
    return shadow_pr.sort_values()[:top]


def bounds(measurement, uncertainty):
    if uncertainty:
        return measurement - uncertainty, measurement + uncertainty
    else:
        return measurement, measurement


def find_metabolite(model, id, namespace, compartment):
    def query_fun(metabolite):
        if metabolite.compartment != compartment:
            return False

        result = _query_item(metabolite, id, namespace)
        if result:
            return result

        # If the original query fails, retry with the compartment id appended
        # to the identifier (a regular convenation with BiGG metabolites, but
        # may also be the case in other namespaces).
        return _query_item(metabolite, f"{id}_{compartment}", namespace)

    metabolites = model.metabolites.query(query_fun)
    if len(metabolites) == 0:
        raise MetaboliteNotFound(
            f"Could not find metabolite {id} or {id}_{compartment} in "
            f"namespace {namespace} and compartment {compartment} for model "
            f"{model.id}"
        )
    elif len(metabolites) > 1:
        raise IndexError(f"Expected single metabolite, found {metabolites}")
    else:
        return metabolites[0]

def _query_item(item, query_id, query_namespace):
    # Try the default identifiers (without confirming the namespace)
    if query_id.lower() == item.id.lower():
        return True

    # Otherwise, try to find a case insensitive match for the namespace key
    for namespace in item.annotation:
        if query_namespace.lower() == namespace.lower():
            annotation = item.annotation[namespace]
            # Compare the identifier case insensitively as well
            # Annotations may contain a single id or a list of ids
            if isinstance(annotation, list):
                if query_id.lower() in [i.lower() for i in annotation]:
                    return True
            else:
                if query_id.lower() == annotation.lower():
                    return True
    return False

def compute_measurements(proteomics, ecModel):
    measurements = pd.DataFrame()
    for protein in proteomics:
        protein_id = protein["identifier"]
        lb, ub = bounds(protein["measurement"], protein["uncertainty"])
        for met in ecModel.metabolites:
            if protein_id in met.id:
                new_row = pd.DataFrame(data={"met_id": met.id, "value": ub}, index=[protein_id])
                measurements = measurements.append(new_row)
    return measurements

def get_ec_exchange_reaction(exchange_reactions, consumption):
    ec_exchange_reaction = []
    for reaction in exchange_reactions:
        if (reaction.products and consumption) or (reaction.reactants and not consumption):
            ec_exchange_reaction.append(reaction)
    return ec_exchange_reaction

class MetaboliteNotFound(Exception):
    pass

In [4]:
def reset_proteomics():
    data = pd.read_csv(proteomics_dataset)  # yeast

    # cols_measurements 
    cols_measurements = data.columns[data.columns.get_loc("Glucose"):data.columns.get_loc("Fructose")]

    # cols uncertainties
    cols_uncertainties = data.columns[data.columns.get_loc("Glucose.1"):data.columns.get_loc("Fructose.1")]

    # E. coli
    proteomics_all = dict()
    for i in range(0,len(cols_measurements)):
        measurement = cols_measurements[i]
        proteomics = []
        for j in range(0,data.shape[0]):
            protein = {"identifier":data["Uniprot Accession"][j], \
            "measurement":data[cols_measurements[i]][j], \
            "uncertainty":data[cols_uncertainties[i]][j]}
            proteomics.append(protein)
        proteomics_all[cols_measurements[i]] = proteomics
    return(proteomics_all)


In [5]:
proteomics_data_dict = reset_proteomics()

In [9]:
#solution = ecModel.optimize()
ecModel.reactions.CPGNR1.

''

In [42]:
fake_proteomics = [
    {"identifier": "P0A8V2", "measurement": 5.03e-6, "uncertainty": 0},  # not in model
    {"identifier": "P0AFG8", "measurement": 8.2e-3, "uncertainty": 8.2e-6},  # will stay
    {"identifier": "P15254", "measurement": 6.54e-8, "uncertainty": 0},  # to remove
    {"identifier": "P0A6C5", "measurement": 5.93e-8, "uncertainty": 0},  # to remove
]
measurements = compute_measurements(proteomics_data_dict["Glucose"], ecModel)


In [43]:
# check if incorporation seems to work
new_growth_rate, new_proteomics, warnings = flexibilize_proteomics(ecModel, "BIOMASS_Ec_iML1515_core_75p37M", {"measurement":0.1, "uncertainty":0.01}, fake_proteomics)
print(new_growth_rate)
print(new_proteomics)
solution = ecModel.optimize()
print(solution)

{'identifier': 'P0A8V2', 'measurement': 5.03e-06, 'uncertainty': 0}
{'identifier': 'P0AFG8', 'measurement': 0.0082, 'uncertainty': 8.2e-06}
{'identifier': 'P15254', 'measurement': 6.54e-08, 'uncertainty': 0}
{'identifier': 'P0A6C5', 'measurement': 5.93e-08, 'uncertainty': 0}
working: P15254 (sp=-406.0679673236969) - mu = 2.6556845062340096e-05
working: P0A6C5 (sp=-9493.16684698304) - mu = 0.0005629447940260943
{'measurement': 0.1, 'uncertainty': 0.01}
[{'identifier': 'P0A8V2', 'measurement': 5.03e-06, 'uncertainty': 0}, {'identifier': 'P0AFG8', 'measurement': 0.0082, 'uncertainty': 8.2e-06}]
<Solution 0.877 at 0x13692d550>


In [19]:
# run on real data and growth rates
data = pd.read_csv(proteomics_dataset)
cols_measurements = data.columns[data.columns.get_loc("Glucose"):data.columns.get_loc("Fructose")]

growth_rates = pd.read_csv("/Users/jonas/Documents/masters/thesis/thesis/data/raw_internal/proteomics/growth_conditions.csv")
growth_rates = growth_rates.drop(growth_rates.columns.difference(['Growth condition','Growth rate (h-1)', 'Stdev']), 1)
growth_rates = growth_rates.drop([0,1], axis=0)


In [46]:
# run 
solutions = dict()
errors = []
counter = 0
for i in cols_measurements:
    counter += 1
    print("Model {} of {}".format(counter, len(cols_measurements)))

    new_growth_rate, new_proteomics, warnings = flexibilize_proteomics(eColi_model, "BIOMASS_Ec_iML1515_core_75p37M", \
    {"measurement":float(list(growth_rates['Growth rate (h-1)'].loc[growth_rates['Growth condition'] == i])[0]),\
    "uncertainty":float(list(growth_rates['Stdev'].loc[growth_rates['Growth condition'] == i])[0])}, \
    proteomics_data_dict[i])
    solutions[i] = ecModel.optimize()


Model 1 of 21


KeyError: 'value'

In [27]:
proteomics_data_dict

{'Glucose': [{'identifier': 'P0A8T7',
   'measurement': 2779,
   'uncertainty': 0.715},
  {'identifier': 'P0A8V2',
   'measurement': 3957,
   'uncertainty': 0.9890000000000001},
  {'identifier': 'P36683', 'measurement': 7596, 'uncertainty': 1.18},
  {'identifier': 'P15254',
   'measurement': 2456,
   'uncertainty': 0.5760000000000001},
  {'identifier': 'P09831', 'measurement': 2859, 'uncertainty': 0.775},
  {'identifier': 'P0AFG8',
   'measurement': 5815,
   'uncertainty': 0.9620000000000001},
  {'identifier': 'P0A9Q7', 'measurement': 4851, 'uncertainty': 0.774},
  {'identifier': 'P0CE47', 'measurement': 252452, 'uncertainty': 18.1},
  {'identifier': 'P25665', 'measurement': 51584, 'uncertainty': 7.25},
  {'identifier': 'P0A6F5', 'measurement': 22739, 'uncertainty': 2.16},
  {'identifier': 'P00968',
   'measurement': 4445,
   'uncertainty': 0.8690000000000001},
  {'identifier': 'P09373', 'measurement': 4615, 'uncertainty': 0.654},
  {'identifier': 'P0A6Y8', 'measurement': 18605, 'uncer

2.3