Import dependencies

In [None]:
%reload_ext autoreload
%autoreload 1
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import cobra
import escher

# Load model

In [None]:
# Enzyme-constrained Yeast 8

# https://github.com/SysBioChalmers/GECKO/blob/main/userData/ecYeastGEM/models/ecYeastGEMfull.yml
#model = cobra.io.load_yaml_model("./models/ecYeastGEMfull.yml")

# https://github.com/SysBioChalmers/ecModels/tree/main/ecYeastGEM/model
model = cobra.io.read_sbml_model("./models/ecYeastGEM_batch.xml")

model

# Inspect functions, metabolites, and reactions

Objective function is `GROWTH` in the yeast-GEM model, inspecting it and related reactions:

In [None]:
model.reactions.get_by_id('r_4041')

In [None]:
model.metabolites.get_by_id('s_0450')

In [None]:
# This is the biomass pseudoreaction -- we're interested in the stoichiometry of this.
biomass = model.reactions.get_by_id('r_4041')
biomass
# Conveniently, it has one 'metabolite' for all lipids, one for all proteins,
# one for RNA, etc.  We want to remove each in turn, so if it's written in this way,
# it's super easy to do so.

In [None]:
def print_formula_weights(reaction):
    print('reactants')
    for reactant in reaction.reactants:
        print(f'{reactant.id} ({reactant.name}): MW {reactant.formula_weight}')
    print('products')
    for product in reaction.products:
        print(f'{product.id} ({product.name}): MW {product.formula_weight}')
        
print_formula_weights(biomass)

In [None]:
def print_formulas(reaction):
    print('reactants')
    for reactant in reaction.reactants:
        print(f'{reactant.id} ({reactant.name}): F {reactant.formula}')
    print('products')
    for product in reaction.products:
        print(f'{product.id} ({product.name}): F {product.formula}')
        
print_formulas(biomass)

Inspecting each type of macromolecule...

## Lipid

In [None]:
model.metabolites.get_by_id('s_1096')

In [None]:
# Lipid pseudoreaction is combining 'bulk' lipid backbone and 'bulk' lipid chain substrates
lipid_reaction = model.reactions.get_by_id('r_2108')
lipid_reaction

In [None]:
print_formula_weights(lipid_reaction)

In [None]:
model.metabolites.get_by_id('s_3747')

In [None]:
lipid_backbone_reaction = model.reactions.get_by_id('r_4063')
lipid_backbone_reaction

In [None]:
print_formula_weights(lipid_backbone_reaction)
print('')

for metabolite, coeff in lipid_backbone_reaction.metabolites.items():
    print(f'{metabolite.id}, {coeff}')
print('')
    
est_mw = 0
for metabolite, coeff in lipid_backbone_reaction.metabolites.items():
    if metabolite.id == 's_0694':
        est_mw += coeff * 736.0359
    else:
        est_mw += coeff * metabolite.formula_weight
est_mw = -est_mw

print(est_mw)

In [None]:
model.metabolites.get_by_id('s_0694')

In [None]:
fatty_acid_backbone_reaction = model.reactions.get_by_id('r_3978')
fatty_acid_backbone_reaction
#print_formula_weights(fatty_acid_backbone_reaction)

In [None]:
rxn = model.reactions.get_by_id('r_3978')
print_formula_weights(rxn)
print(' ')
for metabolite, coeff in rxn.metabolites.items():
    print(f'{metabolite.id}, {coeff}')
fatty_acid_bb_mw = -sum([metabolite.formula_weight * coeff
              for metabolite, coeff in rxn.metabolites.items()])
fatty_acid_bb_mw /= rxn.metabolites[model.metabolites.get_by_id('s_0694')]
print(' ')
print(fatty_acid_bb_mw)

In [None]:
rxn = model.reactions.get_by_id('r_4065')
print_formula_weights(rxn)
print(' ')
for metabolite, coeff in rxn.metabolites.items():
    print(f'{metabolite.id}, {coeff}')
lipid_chain_mw = -sum([metabolite.formula_weight * coeff
              for metabolite, coeff in rxn.metabolites.items()])
print(' ')
print(lipid_chain_mw)

Protein

In [None]:
model.metabolites.get_by_id('s_3717')

In [None]:
# Protein pseudoreaction is combining aminoacyl-tRNAs
model.reactions.get_by_id('r_4047')

In [None]:
protein_pseudoreaction = model.reactions.get_by_id('r_4047')
print_formulas(protein_pseudoreaction)

In [None]:
# hack: reverse-engineering cobra.core.formula and cobra.core.metabolite
# so it can deal with an 'R' element
import re
from typing import TYPE_CHECKING, Dict, Optional, Union
from cobra.core.formula import elements_and_molecular_weights

element_re = re.compile("([A-Z][a-z]?)([0-9.]+[0-9.]?|(?=[A-Z])?)")
elements_and_molecular_weights['R'] = 0

def elements(formula) -> Optional[Dict[str, Union[int, float]]]:
    """Get dicitonary of elements and counts.

    Dictionary of elements as keys and their count in the metabolite
    as integer. When set, the `formula` property is updated accordingly.

    Returns
    -------
    composition: None or Dict
        A dictionary of elements and counts, where count is int unless it is needed
        to be a float.
        Returns None in case of error.

    """
    tmp_formula = formula
    if tmp_formula is None:
        return {}
    # necessary for some old pickles which use the deprecated
    # Formula class
    tmp_formula = str(formula)
    # commonly occurring characters in incorrectly constructed formulas
    if "*" in tmp_formula:
        warn(f"invalid character '*' found in formula '{formula}'")
        tmp_formula = tmp_formula.replace("*", "")
    if "(" in tmp_formula or ")" in tmp_formula:
        warn(f"invalid formula (has parenthesis) in '{formula}'")
        return None
    composition = {}
    parsed = element_re.findall(tmp_formula)
    for element, count in parsed:
        if count == "":
            count = 1
        else:
            try:
                count = float(count)
                int_count = int(count)
                if count == int_count:
                    count = int_count
                else:
                    warn(f"{count} is not an integer (in formula {formula})")
            except ValueError:
                warn(f"failed to parse {count} (in formula {formula})")
                return None
        if element in composition:
            composition[element] += count
        else:
            composition[element] = count
    return composition

def formula_weight(elements) -> Union[int, float]:
    """Calculate the formula weight.

    Returns
    ------
    float, int
        Weight of formula, based on the weight and count of elements. Can be int if
        the formula weight is a whole number, but unlikely.
    """
    try:
        return sum(
            [
                count * elements_and_molecular_weights[element]
                for element, count in elements.items()
            ]
        )
    except KeyError as e:
        warn(f"The element {e} does not appear in the periodic table")

In [None]:
for metabolite, coeff in protein_pseudoreaction.metabolites.items():
    print(f'{metabolite.formula}, {formula_weight(elements(metabolite.formula))}, {coeff}')
    
protein_mw = -sum([formula_weight(elements(metabolite.formula)) * coeff
              for metabolite, coeff in protein_pseudoreaction.metabolites.items()])
protein_mw

In [None]:
model.metabolites.get_by_id('s_1582')

Carbohydrates

In [None]:
model.metabolites.get_by_id('s_3718')

In [None]:
# Carbohydrate pseudoreaction is all the storage and structural (cell wall) ones together
model.reactions.get_by_id('r_4048')

In [None]:
carbohydrate_pseudoreaction = model.reactions.get_by_id('r_4048')
print_formula_weights(carbohydrate_pseudoreaction)

In [None]:
carb_mw = -sum([metabolite.formula_weight * coeff
              for metabolite, coeff in carbohydrate_pseudoreaction.metabolites.items()])
carb_mw

DNA

In [None]:
model.metabolites.get_by_id('s_3720')

In [None]:
# DNA pseudoreaction is combining dNTPs
dna_pseudoreaction = model.reactions.get_by_id('r_4050')

In [None]:
dna_mw = -sum([metabolite.formula_weight * coeff
              for metabolite, coeff in dna_pseudoreaction.metabolites.items()])
dna_mw

RNA

In [None]:
model.metabolites.get_by_id('s_3719')

In [None]:
# RNA pseudoreaction is combining NTPs
model.reactions.get_by_id('r_4049')

In [None]:
rna_pseudoreaction = model.reactions.get_by_id('r_4049')
print_formula_weights(rna_pseudoreaction)

In [None]:
rna_mw = -sum([metabolite.formula_weight * coeff
              for metabolite, coeff in rna_pseudoreaction.metabolites.items()])
rna_mw

Others

In [None]:
# cofactor
model.metabolites.get_by_id('s_4205')

In [None]:
cofactor_pseudoreaction = model.reactions.get_by_id('r_4598')
cofactor_pseudoreaction
print_formula_weights(cofactor_pseudoreaction)

In [None]:
cofactor_pseudoreaction.metabolites

In [None]:
cofactor_mw = -sum([metabolite.formula_weight * coeff
              for metabolite, coeff in cofactor_pseudoreaction.metabolites.items()])
cofactor_mw

In [None]:
# ion
model.metabolites.get_by_id('s_4206')

In [None]:
ion_pseudoreaction = model.reactions.get_by_id('r_4599')
ion_pseudoreaction
print_formula_weights(ion_pseudoreaction)

In [None]:
ion_mw = -sum([metabolite.formula_weight * coeff
              for metabolite, coeff in ion_pseudoreaction.metabolites.items()])
ion_mw

In [None]:
ion_pseudoreaction.metabolites

# Unmodified model

Get glucose uptake

In [None]:
model.reactions.get_by_id('r_1714')

Make glucose uptake unrestricted (same logic as Sánchez et al., 2017, and the usual logic for FBA)

In [None]:
model.reactions.get_by_id('r_1714').bounds = (-1000, 0)

Simulate model and draw fluxes through central carbon metabolism

In [None]:
solution = model.optimize()
b = escher.Builder(
    map_name='iMM904.Central carbon metabolism',
    reaction_data=solution.fluxes.to_dict()
)
b

Get flux through biomass reaction

In [None]:
biomass = model.reactions.get_by_id('r_4041')
print(f'Flux through biomass reaction is {biomass.flux:.4f} h-1')

Estimate timescale for biomass synthesis

In [None]:
# Define constants
CELL_DRY_MASS = 15e-12 # g
# Using C:H(1.613):O(0.557):N(0.158) from https://bionumbers.hms.harvard.edu/bionumber.aspx?id=101801
# MOLWEIGHT_BIOMASS = 12.011 + 1.613*1.00784 + 0.557*15.999 + 0.158*14.0067
# MOLWEIGHT_BIOMASS = 0.966 # g/mmol, Takhaveev et al. (2023)
MOLWEIGHT_BIOMASS = 0.97148015 # g/mmol, from this model

In [None]:
MOLWEIGHT_BIOMASS

In [None]:
biomass_time = 1/(biomass.flux)
print(f'Estimated time: {biomass_time:.4f} hours')

## Modify biomass reaction by ablating each type of macromolecule

In [None]:
CELL_DRY_MASS = 15e-12 # g

class BiomassComponent():
    def __init__(
        self,
        metabolite_label,
        metabolite_id,
        pseudoreaction,
        molecular_mass,
        mass_per_cell,
        copy_number,
    ):
        self.metabolite_label = metabolite_label
        self.metabolite_id = metabolite_id
        self.pseudoreaction = pseudoreaction
        self.molecular_mass = molecular_mass # g/mmol
        self.mass_per_cell = mass_per_cell # g
        self.copy_number = copy_number
        
        self.ablated_flux = None # mmol/(g DW . h)
        self.est_time = None # h
        
    def get_est_time(self):
        #self.est_time = self.mass_per_cell/(CELL_DRY_MASS * self.ablated_flux * self.molecular_mass)
        self.est_time = 1/self.ablated_flux
        

In [None]:
#model_saved = cobra.io.load_yaml_model("./models/ecYeastGEMfull.yml")
model_saved = cobra.io.read_sbml_model("./models/ecYeastGEM_batch.xml")

In [None]:
# TODO:
# - Create CSV table containing these
# - Create a class builder that builds these classes based on the CSV table
# - FURTHER: make it able to deal with ranges of values (lower limit, upper limit)

# molecular mass applies to bulk metabolites in ecYeast8 model,
# calculated using same logic as Takhaveev et al. (2023)
Lipids = BiomassComponent(
    metabolite_label='lipid',
    metabolite_id='s_1096[c]',
    pseudoreaction='r_2108',
    molecular_mass=48.782232e-3,
    mass_per_cell=900e-15,
    copy_number=1e9,
)

Proteins = BiomassComponent(
    metabolite_label='protein',
    metabolite_id='s_3717[c]',
    pseudoreaction='r_4047',
    molecular_mass=464.0244378083234e-3,
    mass_per_cell=7650e-15,
    copy_number=1e8,
)

Carbohydrates = BiomassComponent(
    metabolite_label='carbohydrate',
    metabolite_id='s_3718[c]',
    pseudoreaction='r_4048',
    molecular_mass=383.11811162812177e-3,
    mass_per_cell=(75+3450)*1e-15, # 'storage carbohydrates' + 'structural polymers'
    copy_number=2122804981, # estimated from above & avogadro's const
)

DNA = BiomassComponent(
    metabolite_label='DNA',
    metabolite_id='s_3720[c]',
    pseudoreaction='r_4050',
    molecular_mass=3.898762509981403e-3,
    mass_per_cell=75e-15,
    copy_number=16,
)

RNA = BiomassComponent(
    metabolite_label='RNA',
    metabolite_id='s_3719[c]',
    pseudoreaction='r_4049',
    molecular_mass=64.04239166780803e-3,
    mass_per_cell=1650e-15,
    copy_number=4e6,
)

In [None]:
from cobra.flux_analysis import flux_variability_analysis


biomass_component_list = [Lipids, Proteins, Carbohydrates, DNA, RNA]

all_metabolite_ids = [
    biomass_component.metabolite_id
    for biomass_component in biomass_component_list
]

all_pseudoreaction_ids = [
    (biomass_component.metabolite_label, biomass_component.pseudoreaction)
    for biomass_component in biomass_component_list
]
all_pseudoreaction_ids.append(('objective','r_4041'))

# original
model = model_saved.copy()
model.reactions.get_by_id('r_1714').bounds = (-1000, 0) # glucose uptake
biomass_reaction = model.reactions.get_by_id('r_4041')
biomass_reaction.bounds = (0, 1000)
fba_solution = model.optimize()
pfba_solution = cobra.flux_analysis.pfba(model) # parsimonious
print('original')
print(biomass_reaction)
biomass_flux = pfba_solution.fluxes["r_4041"]
print(f'flux of objective function: {biomass_flux:.4f} (h-1))')
for metabolite_label, pseudoreaction_id in all_pseudoreaction_ids:
    print(f'{metabolite_label} pseudoreaction ({pseudoreaction_id}) flux = {pfba_solution.fluxes[pseudoreaction_id]}')
plt.subplots()
plt.bar(
    [label for (label, _) in all_pseudoreaction_ids],
    [pfba_solution.fluxes[pseudoreaction_id] for (_, pseudoreaction_id) in all_pseudoreaction_ids],
)
plt.ylim((0,0.15))
plt.title('original')
biomass_time = 1/(biomass_flux * MOLWEIGHT_BIOMASS)
print(f'Estimated time: {biomass_time:.4f} hours')
print('\n')

# ablated
for biomass_component in biomass_component_list:
    # get model
    model = model_saved.copy()
    model.reactions.get_by_id('r_1714').bounds = (-1000, 0)
    biomass_reaction = model.reactions.get_by_id('r_4041')
    biomass_reaction.bounds = (0, 1000)
    
    # boilerplate: lookup
    to_ablate = all_metabolite_ids.copy()
    to_ablate.remove(biomass_component.metabolite_id)
    to_ablate_keys = [
        model.metabolites.get_by_id(metabolite_id)
        for metabolite_id in to_ablate
    ]
    to_ablate_dict = dict(zip(to_ablate_keys, [-1]*len(to_ablate_keys)))
    
    # ablate metabolites from biomass reaction
    biomass_reaction.subtract_metabolites(to_ablate_dict)
    
    # optimise model
    #solution = model.optimize()
    pfba_solution = cobra.flux_analysis.pfba(model) # parsimonious
    biomass_flux = pfba_solution.fluxes["r_4041"]
    biomass_component.ablated_flux = biomass_flux
    biomass_component.get_est_time()
    print(f'prioritising {biomass_component.metabolite_label}')
    print(biomass_reaction)
    print(f'flux of ablated objective function: {biomass_flux:.4f} (h-1))')
    for metabolite_label, pseudoreaction_id in all_pseudoreaction_ids:
        print(f'{metabolite_label} pseudoreaction ({pseudoreaction_id}) flux = {pfba_solution.fluxes[pseudoreaction_id]}')
    plt.subplots()
    plt.bar(
        [label for (label, _) in all_pseudoreaction_ids],
        [pfba_solution.fluxes[pseudoreaction_id] for (_, pseudoreaction_id) in all_pseudoreaction_ids],
    )
    plt.ylim((0,0.15))
    plt.title(f'prioritising {biomass_component.metabolite_label}')
    print(f'estimated time: {biomass_component.est_time:.4f} hours')
    print('\n')
    
    print(flux_variability_analysis(
        model,
        [pseudoreaction_id for (_, pseudoreaction_id) in all_pseudoreaction_ids],
        fraction_of_optimum=0.9)
    )
    
total_time = sum([biomass_component.est_time for biomass_component in biomass_component_list])
print(f'sum of times: {total_time:.4f} hours')

Draw fluxes through central carbon metabolism

In [None]:
b = escher.Builder(
    map_name='iMM904.Central carbon metabolism',
    reaction_data=solution.fluxes.to_dict()
)
b

Misc

In [None]:
from cobra.flux_analysis import flux_variability_analysis

print(flux_variability_analysis(
    model, [pseudoreaction_id for (_, pseudoreaction_id) in all_pseudoreaction_ids]))