Import dependencies

In [None]:
%reload_ext autoreload
%autoreload 1
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import cobra
import escher

import time
from wrapt_timeout_decorator import *

# Load model

Choose from alternatives

In [None]:
# Yeast 8
model = cobra.io.read_sbml_model("../data/gemfiles/yeast-GEM-BiGG.xml")

In [None]:
# Enzyme-constrained Yeast 8, batch
# https://github.com/SysBioChalmers/ecModels/tree/main/ecYeastGEM/model
# This is supposed under CI, i.e.
# automatically re-generated and updated when new models are available.
# This model is based on Yeast8.3.4

# Average enzyme saturation factor (sigma) = 0.5
# Total protein content in the cell [g protein/gDw] (Ptot) = 0.5
# Fraction of enzymes in the model [g enzyme/g protein] (f) = 0.5
# https://github.com/SysBioChalmers/GECKO/blob/main/userData/ecYeastGEM/YeastGEMAdapter.m
model = cobra.io.read_sbml_model("../data/gemfiles/ecYeastGEM_batch.xml")

Show model

In [None]:
model

# Objective function

In the ecYeast8 (batch) model, the objective function -- growth -- is reaction ID `r_2111`.

This reaction is linked to the biomass reaction, ID `r_4041`.

Here, we also see the stoichiometry.  There are five classes of macromolecules: lipids, proteins, carbohydrates, DNA, and RNA.  And there are two other bulk metabolites: cofactor and ion.

In [None]:
model.reactions.get_by_id('r_2111')

In [None]:
model.reactions.get_by_id('r_4041')

Medium

In [None]:
model.medium

In [None]:
for reaction_id in model.medium.keys():
    print(model.reactions.get_by_id(reaction_id).name)

Remove bounds on glucose uptake and growth rate

In [None]:
# (no need because bounds are already unrestricted)
# Unrestrict glucose uptake
model.reactions.get_by_id('r_1714').bounds = (-1000.0, 0)
# Unrestrict oxygen uptake (aerobic)
model.reactions.get_by_id('r_1992').bounds = (-1000.0, 0)
# Unrestrict objective function
model.reactions.get_by_id('r_4041').bounds = (0, 1000.0)

Optimise using (vanilla) FBA

In [None]:
solution = model.optimize()

In [None]:
model.summary()

In [None]:
solution['r_0466No1']

Linear reaction coefficients

In [None]:
cobra.util.solver.linear_reaction_coefficients(model)

Check usage pool reaction

In [None]:
model.metabolites.get_by_id('prot_pool[c]')

In [None]:
model.reactions.get_by_id('prot_pool_exchange')

# Auxotrophs

BY4741: MATa his3Δ1 leu2Δ0 met15Δ0 ura3Δ0

In [None]:
# Create auxotrophic strain by deleting genes
genes_to_delete = ['YOR202W', 'YCL018W', 'YLR303W', 'YEL021W']
for gene in genes_to_delete:
    model.genes.get_by_id(gene).knock_out()
    
# Add amino acids and uracil to medium
exch_list = ['r_1893', 'r_1899', 'r_1902', 'r_2090',
             'r_1893_REV', 'r_1899_REV', 'r_1902_REV', 'r_2090_REV']

for exch in exch_list:
    model.reactions.get_by_id(exch).bounds = (-1000, 0)

Alternatively, BY4742: MATα his3Δ1 leu2Δ0 lys2Δ0 ura3Δ0

In [None]:
# Create auxotrophic strain by deleting genes
genes_to_delete = ['YOR202W', 'YCL018W', 'YBR115C', 'YEL021W']
for gene in genes_to_delete:
    model.genes.get_by_id(gene).knock_out()
    
# Add amino acids and uracil to medium
exch_list = ['r_1893', 'r_1899', 'r_1900', 'r_2090',
             'r_1893_REV', 'r_1899_REV', 'r_1900_REV', 'r_2090_REV']

for exch in exch_list:
    model.reactions.get_by_id(exch).bounds = (-1000, 0)

Set carbon source to glucose

In [None]:
# (no need because bounds are already unrestricted)
# Unrestrict glucose uptake
model.reactions.get_by_id('r_1714').bounds = (-1000.0, 0)
# Unrestrict oxygen uptake (aerobic)
model.reactions.get_by_id('r_1992').bounds = (-1000.0, 0)
# Unrestrict objective function
model.reactions.get_by_id('r_4041').bounds = (0, 1000.0)

Simulate

In [None]:
@timeout(60)
def model_optimize(model):
    return model.optimize()

try:
    model_optimize(model)
    print('model optimised, no timeout')
except TimeoutError():
    print('model optimisation: timeout!')

In [None]:
model.summary()

> As hoped, auxotrophs with the appropriate supplements grows with a rate comparable to wild-type. BY4741: 0.41, BY4742: 0.44.

# Gene deletions

## Example: NDI1

This example aims to replicate knockout simulations in Sánchez et al. (2017), where they did this with ecYeast7.

NDI1 is represented in the model by its systematic name YML120C.

Genes are matched to reactions in the model via gene-protein reaction (GPR) map, which is present in the source XML model.  As we're using a GECKO-generated model, the reactions will also include `draw_prot_XXXX` reactions (protein pool) that are created due to the formalism.  In this case, it is `draw_prot_P32340`, matching the associated enzyme P32340.

Note: Sánchez et al. (2017) used different parameters, namely: $P_{tot}$ = 0.448 g gDW<sup>-1</sup>, $f$ = 0.2154 g g<sup>-1</sup>, $\sigma$ = 0.46.

In [None]:
model.genes.get_by_id('YML120C')

In [None]:
model.genes.get_by_id('YML120C').reactions

Inspect these reactions.  These should have a `prot_XXXX` reactant because of the GECKO formalism and bounds of (0, inf).

In [None]:
model.reactions.get_by_id('r_0773No1')

In [None]:
model.reactions.get_by_id('draw_prot_P32340')

Delete this gene.

In [None]:
model.genes.get_by_id('YML120C').knock_out()

Effect: bounds of the reactions should be zero.

In [None]:
print(model.reactions.get_by_id('r_0773No1').bounds)
print(model.reactions.get_by_id('draw_prot_P32340').bounds)

Optimise.

In [None]:
solution = model.optimize()
model.summary()

Sánchez et al. (2017) also blocked NDE1 and NDE2 to simulated the limited capacity of the ethanol-acetaldehyde shuttle _in vivo_.

In [None]:
model.genes.get_by_id('YMR145C').knock_out()
model.genes.get_by_id('YDL085W').knock_out()

In [None]:
solution = model.optimize()
model.summary()

## Genes of interest

In [None]:
model_saved = model.copy()

Define lookup table.  It's better to download a data table and use it, but I study only a few genes, so I don't want to over-complicate it for now, and I'll probably deal with this in a refactor.

In [None]:
gene_systematic = {
    'ALD6': 'YPL061W',
    'GPH1': 'YPR160W',
    'GSY2': 'YLR258W',
    'IDP2': 'YLR174W',
    'PGI1': 'YBR196C',
    'RIM11': 'YMR139W',
    'SWE1': 'YJL187C',
    'TSA1': 'YML028W',
    'TSA2': 'YDR453C',
    'ZWF1': 'YNL241C',
}

Define deletion strains.  List of lists to allow for multiple deletions per strain.  Some genes will not be found as they are not metabolic genes.

**Note: Probably worth encapsulating this in an object after I'm satisfied with this proof-of-concept.**

In [None]:
list_deletion_strains = [
    #['RIM11'],
    #['SWE1'],
    ['TSA1', 'TSA2'],
    ['ZWF1'],
    ['ZWF1', 'ALD6'],
    ['ZWF1', 'ALD6', 'IDP2'],    
    ['GSY2'],
    ['GPH1'],
    #['PGI1']
]

In [None]:
for deletion_strain in list_deletion_strains:
    print(deletion_strain)
    # Re-load model
    m = model_saved.copy()
    # Knock out genes in strain of interest
    for gene in deletion_strain:
        try:
            print(f'{gene}-associated reactions:')
            for reaction in m.genes.get_by_id(gene_systematic[gene]).reactions:
                print(reaction.reaction)
            m.genes.get_by_id(gene_systematic[gene]).knock_out()
        except KeyError as inst:
            print(f'{gene} not found, skipping')
        
    # Unrestrict glucose uptake
    m.reactions.get_by_id('r_1714').bounds = (-1000, 0)
    # Unrestrict objective function
    m.reactions.get_by_id('r_2111').bounds = (0, 1000)
    # Optimise using FBA
    fba_solution = m.optimize()
    # Get growth rate
    growth_flux = fba_solution.fluxes["r_2111"]
    print(f'Growth: {growth_flux}')
    print('\n')

## Focus: ZWF1

Optimise

In [None]:
model = cobra.io.read_sbml_model("../data/gemfiles/ecYeastGEM_batch.xml")

# BY4741
genes_to_delete = ['YOR202W', 'YCL018W', 'YLR303W', 'YEL021W']
for gene in genes_to_delete:
    model.genes.get_by_id(gene).knock_out()
exch_list = ['r_1893', 'r_1899', 'r_1902', 'r_2090',
             'r_1893_REV', 'r_1899_REV', 'r_1902_REV', 'r_2090_REV']
for exch in exch_list:
    model.reactions.get_by_id(exch).bounds = (-1000, 0)
    
# ZWF1
model.genes.get_by_id('YNL241C').knock_out()

# Optimise
@timeout(60)
def model_optimize(model):
    return model.optimize()

try:
    zwf_solution = model_optimize(model)
    print('model optimised, no timeout')
except TimeoutError():
    print('model optimisation: timeout!')

In [None]:
solution

In [None]:
zwf_solution

Get difference between wild-type and strain fluxes

In [None]:
diff_fluxes = zwf_solution.fluxes - solution.fluxes
nonzero_idx = diff_fluxes.to_numpy().nonzero()[0]
diff_fluxes_nonzero = diff_fluxes[nonzero_idx]

In [None]:
diff_fluxes_nonzero

See which ones have the greatest changes (absolute)

In [None]:
diff_fluxes_sorted = diff_fluxes_nonzero[diff_fluxes_nonzero.abs().sort_values(ascending=False).index]
for rxn_id, flux in diff_fluxes_sorted.items():
    print(f'{rxn_id}, {model.reactions.get_by_id(rxn_id).name}, {flux}')

In [None]:
model.reactions.get_by_id('r_0959No2')

In [None]:
sum(diff_fluxes_nonzero.index.str.contains('draw'))

## Deletion collection

In [None]:
from cobra.flux_analysis import single_gene_deletion

deletion_results = single_gene_deletion(model)

In [None]:
deletion_results

In [None]:
plt.hist(deletion_results.growth, bins=100)
plt.show()