In [1]:
# Import libraries - REQUIRES pip version 9.0.3
import pandas
import os
from os.path import join
import sys
import numpy
import pickle

# Using Cobrapy 0.13
import cobra
import cobra.test
from cobra.io import write_sbml_model
from cobra.flux_analysis import flux_variability_analysis
from cobra.flux_analysis import gapfill
#from cobra.flux_analysis.sampling import OptGPSampler

# Using Gurobi solver instead of GLPK
import gurobipy
from optlang import gurobi_interface


In [31]:
# Function to calculate doubling time from objective value
def doubling(model):
    
    with model as m:
        ov = m.slim_optimize()
        if ov < 1e-6:
            growth = 'No growth'
        else:
            growth = (1 / float(m.slim_optimize())) * 3600
        
            if growth < 60.0:
                growth = str(round(growth, 3)) + ' minutes'
            else:
                growth = growth / 60
                growth = str(round(growth, 3)) + ' hours'
            
    return growth


# Identifies blocked reactions, 1% cutoff for fraction of optimum
def blockedReactions(model):
    
    with model as m:
        blocked = cobra.flux_analysis.variability.find_blocked_reactions(m)
        nogene_blocked = []
        nogene_gapfilled_blocked = []
        for rxn in blocked:
            if m.reactions.get_by_id(rxn).gene_reaction_rule == '':
                nogene_blocked.append(rxn)
                if m.reactions.get_by_id(rxn).notes == 'gapfilled':
                    nogene_gapfilled_blocked.append(rxn)
    
    print(str(len(blocked)) + ' total reactions are blocked')
    print(str(len(nogene_blocked)) + ' reactions without GPRs are blocked')
    print(str(len(nogene_gapfilled_blocked)) + ' gapfilled reactions without GPRs are blocked')
    
    return blocked, nogene_blocked, nogene_gapfilled_blocked


# Identify potentially gapfilled reactions
def missingGPR(model, exclude=['dna_rxn','rna_rxn','protein_rxn','teichoicacid_rxn','peptidoglycan_rxn','lipid_rxn','cofactor_rxn','biomass']):
    gapfilled = []
    exch = 0
    if not type(exclude) is list:
        exclude = [exclude]
        
    for index in model.reactions:
        if len(list(index.genes)) == 0:
            if index.id not in exclude:
                if len(list(index.products)) != 0:
                    gapfilled.append(index.id)
                else:
                    exch += 1
    if len(gapfilled) > 0:
        print(str(len(gapfilled)) + ' metabolic reactions not associated with genes and ' + str(exch) + ' are exchanges')
    
    return gapfilled

# Checks which cytosolic metabolites are generated for free (bacteria only)
def checkFreeMass(raw_model):
    
    with raw_model as model:
        
        # Close all exchanges
        for rxn in model.reactions:
            if len(list(rxn.products)) == 0:
                model.reactions.get_by_id(rxn.id).lower_bound = 0.
        
        free = []
        for index in model.metabolites: 
            with model as m:
                demand = m.add_boundary(index, type='demand')
                m.objective = demand
                obj_val = m.slim_optimize(error_value=0.)
                if obj_val > 1e-6:
                    free.append(index.id)
    
    if len(free) > 0:
        print(str(len(free)) + ' metabolites are generated for free')

    return(free)

# Check for mass and charge balance in reactions
def checkBalance(raw_model, exclude=['dna_rxn','rna_rxn','protein_rxn','teichoicacid_rxn','peptidoglycan_rxn','lipid_rxn','cofactor_rxn','biomass']):
    
    with raw_model as model:
        imbalanced = []
        mass_imbal = 0
        charge_imbal = 0
        elem_set = set()
        for metabolite in model.metabolites:
            try:
                elem_set |= set(metabolite.elements.keys())
            except:
                pass
        
        if len(elem_set) == 0:
            print('No elemental data associated with metabolites!')
        
        else:
            if not type(exclude) is list: 
                exclude = [exclude]
            for index in model.reactions:
                if index in model.boundary or index.id in exclude:
                    continue

                else:
                    try:
                        test = index.check_mass_balance()
                    except ValueError:
                        continue

                    if len(list(test)) > 0:
                        imbalanced.append(index.id)

                        if 'charge' in test.keys():
                            charge_imbal += 1
                        if len(set(test.keys()).intersection(elem_set)) > 0:
                            mass_imbal += 1

    if mass_imbal != 0:
        print(str(mass_imbal) + ' reactions are mass imbalanced')
    if charge_imbal != 0:
        print(str(charge_imbal) + ' reactions are charge imbalanced')
    
    return(imbalanced)


def checkDetermination(model):
    if len(model.reactions) < len(model.metabolites):
        print('Model is overdetermined')
    if len(model.reactions) > len(model.metabolites):
        print('Model is underdetermined')

        
# Checks the quality of models by a couple metrics and returns problems
def checkQuality(model, exclude=['dna_rxn','rna_rxn','protein_rxn','teichoicacid_rxn','peptidoglycan_rxn','lipid_rxn','cofactor_rxn','biomass'], cytosol='cytosol'):
    
    if len(list(model.compartments)) == 1:
        print('Model only has 1 compartment')
    checkDetermination(model)
    gaps = missingGPR(model, exclude)
    freemass = checkFreeMass(model)
    balance = checkBalance(model, exclude)
    
    test = gaps + freemass + balance
    if len(test) == 0:
        print('No inconsistencies detected')



#### iMLTC806cdf
Larocque, M., Chénard, T., & Najmanovich, R. (2014). A curated C. difficile strain 630 metabolic network: prediction of essential targets and inhibitors. BMC Systems Biology, 8, 117. http://doi.org/10.1186/s12918-014-0117-z


#### icdf834
Kashaf, S. S., Angione, C., & Lió, P. (2017). Making life difficult for Clostridium difficile: augmenting the pathogen’s metabolic model with transcriptomic and codon usage data for better therapeutic target characterization. BMC Systems Biology, 11, 25. http://doi.org/10.1186/s12918-017-0395-3

#### iHD992
Dannheim, H., Will, S. E., Schomburg, D., & Neumann‐Schaal, M. (2017). Clostridioides difficile 630Δerm in silico and in vivo – quantitative growth and extensive polysaccharide secretion. FEBS Open Bio, 7(4), 602–615. http://doi.org/10.1002/2211-5463.12208


In [4]:
# Read in sbml files
iMLTC806cdf = cobra.io.read_sbml_model('data/iMLTC806cdf/MODEL1409240004.xml')
iMLTC806cdf.objective = iMLTC806cdf.reactions.get_by_id('Biomass')

icdf834 = cobra.io.read_sbml_model('data/icdf834/icdf834.sbml')

iHD992 = cobra.io.read_sbml_model('data/iHD992/iHD992.sbml')
biomass_rxn = iHD992.reactions.get_by_id('Biomass')
iHD992.objective = biomass_rxn

cd630_PATRIC = cobra.io.read_sbml_model('data/PATRIC/272563.8.xml')

iCd630J = cobra.io.read_sbml_model('data/iCd630J.sbml')


In [5]:
iMLTC806cdf

0,1
Name,MODEL1409240004
Memory address,0x07ff0e5708e10
Number of metabolites,1638
Number of reactions,1219
Objective expression,1.0*Biomass - 1.0*Biomass_reverse_57a34
Compartments,"Cytosol, Extracellular"


In [6]:
print(len(iMLTC806cdf.genes))

0


In [32]:
checkQuality(iMLTC806cdf, exclude=['Biomass'], cytosol='c')

Model is overdetermined
1101 metabolic reactions not associated with genes (117 are exchanges)
579 metabolites are generated for free
34 reactions are mass imbalanced


In [8]:
blocked, nogene_blocked, nogene_gapfilled_blocked = blockedReactions(iMLTC806cdf)

368 total reactions are blocked
368 reactions without GPRs are blocked
0 gapfilled reactions without GPRs are blocked


In [9]:
doubling(iMLTC806cdf)

'29.807 minutes'

In [10]:
icdf834

0,1
Name,icdf834_ex
Memory address,0x07ff0e5708f90
Number of metabolites,807
Number of reactions,1168
Objective expression,-1.0*Biomass_reverse_57a34 + 1.0*Biomass
Compartments,"Cytosol, Extracellular"


In [11]:
print(len(icdf834.genes))

832


In [12]:
checkQuality(icdf834, exclude=['Biomass'], cytosol='Cytosol')

Model is underdetermined
259 metabolic reactions not associated with genes
4 metabolites are generated for free
144 reactions are mass imbalanced


In [13]:
doubling(icdf834)

'24.569 minutes'

In [14]:
iHD992

0,1
Name,iHD992
Memory address,0x07ff0e47a7f50
Number of metabolites,935
Number of reactions,1125
Objective expression,1.0*Biomass - 1.0*Biomass_reverse_57a34
Compartments,Cytosol


In [15]:
print(len(iHD992.genes))

0


In [16]:
checkQuality(iHD992, exclude=['Biomass'], cytosol='Cytosol')

Model only has 1 compartment
Model is underdetermined
964 metabolic reactions not associated with genes
303 metabolites are generated for free
No elemental data associated with metabolites!


In [17]:
blocked, nogene_blocked, nogene_gapfilled_blocked = blockedReactions(iHD992)

97 total reactions are blocked
97 reactions without GPRs are blocked
0 gapfilled reactions without GPRs are blocked


In [18]:
doubling(iHD992)

'20.863 minutes'

In [19]:
cd630_PATRIC

0,1
Name,272563.8
Memory address,0x07ff0e52ce290
Number of metabolites,1265
Number of reactions,1192
Objective expression,-1.0*bio1_reverse_b18f7 + 1.0*bio1
Compartments,"Cytosol, Extracellular"


In [20]:
print(len(cd630_PATRIC.genes))

744


In [21]:
checkQuality(cd630_PATRIC, exclude=['bio1'], cytosol='Cytosol')

Model is overdetermined
135 metabolic reactions not associated with genes
3 metabolites are generated for free
25 reactions are charge imbalanced


In [22]:
blocked, nogene_blocked, nogene_gapfilled_blocked = blockedReactions(cd630_PATRIC)

574 total reactions are blocked
32 reactions without GPRs are blocked
0 gapfilled reactions without GPRs are blocked


In [23]:
doubling(cd630_PATRIC)

'1.751 hours'

In [24]:
iCd630J

0,1
Name,iCd630J
Memory address,0x07ff0e433ec90
Number of metabolites,1175
Number of reactions,1191
Objective expression,-1.0*biomass_reverse_01e59 + 1.0*biomass
Compartments,"cytosol, extracellular"


In [25]:
print(len(iCd630J.genes))

790


In [26]:
checkQuality(iCd630J, exclude=['dna_rxn','rna_rxn','protein_rxn','teichoicacid_rxn','peptidoglycan_rxn','lipid_rxn','cofactor_rxn','biomass'], cytosol='cytosol')

Model is underdetermined
102 metabolic reactions not associated with genes
53 reactions are charge imbalanced


In [27]:
blocked, nogene_blocked, nogene_gapfilled_blocked = blockedReactions(iCd630J)

505 total reactions are blocked
12 reactions without GPRs are blocked
0 gapfilled reactions without GPRs are blocked


In [28]:
doubling(iCd630J)

'42.617 minutes'