In [3]:
# Define functions

# Identify potentially gapfilled reactions
def findGapfilledRxn(model, exclude=[]):
    gapfilled = []
    transport = findTransports(model)
    if not type(exclude) is list:
        exclude = [exclude]
        
    for index in model.reactions:
        if len(list(index.genes)) == 0:
            if not index in model.boundary:
                if not index.id in exclude or not index.id in transport:
                    gapfilled.append(index.id)
    
    if len(gapfilled) > 0:
        print(str(len(gapfilled)) + ' metabolic reactions not associated with genes')
    
    return gapfilled

# Check for missing transport and exchange reactions
def missingRxns(model, extracellular='e'):

    transporters = set(findTransports(model))
    exchanges = set([x.id for x in model.exchanges])
    
    missing_exchanges = []
    missing_transports = []
    
    for metabolite in model.metabolites:
        if not metabolite.compartment == extracellular:
            continue

        curr_rxns = set([x.id for x in list(metabolite.reactions)])
        
        if not bool(curr_rxns & transporters):
            missing_transports.append(metabolite.id)
        if not bool(curr_rxns & exchanges):
            missing_exchanges.append(metabolite.id)
    
    if len(missing_transports) != 0:
        print(str(len(missing_transports)) + ' extracellular metabolites are missing transport reactions')
    if len(missing_exchanges) != 0:
        print(str(len(missing_exchanges)) + ' extracellular metabolites are missing exchange reactions')
        
    return missing_transports, missing_exchanges

In [4]:
# Checks which cytosolic metabolites are generated for free (bacteria only)
def checkFreeMass(raw_model, cytosol='cytosol'):
    
    with raw_model as model:
        
        # Close all exchanges
        for index in model.boundary:
            model.reactions.get_by_id(index.id).lower_bound = 0.
        
        # Identify all metabolites that are produced within the network
        demand_metabolites = [x.reactants[0].id for x in model.demands if len(x.reactants) > 0]
        demand_metabolites += [x.products[0].id for x in model.demands if len(x.products) > 0]

        free = []
        for index in model.metabolites: 
            if index.id in demand_metabolites:
                continue
            elif not index.compartment in cytosol:
                continue
            else:
                demand = model.add_boundary(index, type='demand')
                model.objective = demand
                obj_val = model.slim_optimize(error_value=0.)
                if obj_val > 1e-8:
                    free.append(index.id)
                model.remove_reactions([demand])
    
    if len(free) > 0:
        print(str(len(free)) + ' metabolites are generated for free')

    return(free)


In [5]:
# Check for mass and charge balance in reactions
def checkBalance(raw_model, exclude=[]):
    
    with raw_model as model:
        imbalanced = []
        mass_imbal = 0
        charge_imbal = 0
        elem_set = set()
        for metabolite in model.metabolites:
            try:
                elem_set |= set(metabolite.elements.keys())
            except:
                pass
        
        if len(elem_set) == 0:
            imbalanced = model.reactions
            mass_imbal = len(model.reactions)
            charge_imbal = len(model.reactions)
            print('No elemental data associated with metabolites!')
        
        else:
            if not type(exclude) is list: 
                exclude = [exclude]
            for index in model.reactions:
                if index in model.boundary or index.id in exclude:
                    continue

                else:
                    try:
                        test = index.check_mass_balance()
                    except ValueError:
                        continue

                    if len(list(test)) > 0:
                        imbalanced.append(index.id)

                        if 'charge' in test.keys():
                            charge_imbal += 1
                        if len(set(test.keys()).intersection(elem_set)) > 0:
                            mass_imbal += 1

    if mass_imbal != 0:
        print(str(mass_imbal) + ' reactions are mass imbalanced')
    if charge_imbal != 0:
        print(str(charge_imbal) + ' reactions are charge imbalanced')
    return(imbalanced)

In [6]:
# Checks the quality of models by a couple metrics and returns problems
def checkQuality(model, exclude=[], cytosol='c'):
    
    start_time = time.time()
    
    if model.name != None:
        model_name = model.name
    else:
        model_name = 'model'
    
    gaps = findGapfilledRxn(model, exclude)
    freemass = checkFreeMass(model, cytosol)
    balance = checkBalance(model, exclude)
    trans, exch = missingRxns(model)
    
    test = gaps + freemass + balance
    if len(test) == 0:
        print('No inconsistencies detected')
    
    duration = int(round(time.time() - start_time))
    print('Took ' + str(duration) + ' seconds to analyze ' + model_name) 

    return gaps, freemass, balance, trans, exch

In [7]:
# Trace back through reactions immediately adjacent to a given reaction to identify blocked precursor synthesis
def checkPrecursors(model, reaction):
    
    if isinstance(reaction, str) == True:
        reaction = model.reactions.get_by_id(reaction)
    
    model.objective = reaction
    obj_val = max(model.optimize(objective_sense='maximize').objective_value, abs(model.optimize(objective_sense='minimize').objective_value))

    if obj_val > 0.001:
        print('Able to produce all precursors for this reaction.')
        return None
    
    else:
        reactants = reaction.reactants
        check = 0
        
        for reactant in reactants:
            sub_reactions = list(reactant.reactions)
            
            for sub_reaction in sub_reactions:
                model.objective = sub_reaction
                obj_val = max(model.optimize(objective_sense='maximize').objective_value, abs(model.optimize(objective_sense='minimize').objective_value))

                if obj_val < 0.001 and reactant in sub_reaction.products:                    
                    print('Cannot acquire ' + str(reactant.id) + ' via ' + str(sub_reaction.id))
                elif obj_val < 0.001 and check < 1 and reactant in sub_reaction.reactants:
                    print(str(reactant.id) + ' not produced in any reactions.')
                    check += 1

#------------------------------------------------------------------------------------#

In [8]:
# Function to calculate doubling time from objective value
def doubling(model):
    
    with model as m:
        ov = m.slim_optimize()
        if ov < 1e-6:
            growth = 'No growth'
        else:
            growth = (1 / float(m.slim_optimize())) * 3600
        
            if growth < 60.0:
                growth = str(round(growth, 3)) + ' minutes'
            else:
                growth = growth / 60
                growth = str(round(growth, 3)) + ' hours'
            
    return growth



In [9]:
# Checks for availability of reactants of a given reaction
def availability(model, target_rxn):

    precursors = model.reactions.get_by_id(target_rxn).reactants
    
    total = 0
    unsuccessful = set()
    limited = set()
    for precursor in precursors:
        precursor_rxn = list(model.metabolites.get_by_id(precursor.id).reactions)
        
        for rxn in precursor_rxn:
            if rxn.id == target_rxn:
                continue
            elif precursor in model.reactions.get_by_id(rxn.id).reactants:
                
                model.objective = rxn
                obj_val = model.slim_optimize()
                if obj_val < 1e-8:
                    unsuccessful |= set([rxn.id])
                    limited |= set([precursor.id])
    
    print('Failed reactions: ' + str(len(unsuccessful)))
    print('Limiting reactants: ' + str(len(limited)))
    
    return unsuccessful, limited

# Removes all metabolites in a list of metabolite ids and all reactions associated with them
def removeAll(model, metabolite_list):
    
    new_model = copy.deepcopy(model)
    
    for metabolite in metabolite_list:
        try:
            metabolite = new_model.metabolites.get_by_id(metabolite)
            new_model.remove_reactions(metabolite.reactions)
            new_model.remove_metabolites([metabolite])
        except KeyError:
            print(metabolite + ' not found')
            continue
        
    return new_model

In [10]:
# Identify transport reactions (for any number compartments)
def findTransports(model):
    transporters = []
    compartments = set(list(model.compartments))
    if len(compartments) == 1:
        raise Exception('Model only has one compartment!')
    
    for reaction in model.reactions:
        
        reactant_compartments = set([x.compartment for x in reaction.reactants])
        product_compartments = set([x.compartment for x in reaction.products])
        reactant_baseID = set([x.id.split('_')[0] for x in reaction.reactants])
        product_baseID = set([x.id.split('_')[0] for x in reaction.products])
        
        if reactant_compartments == product_compartments and reactant_baseID != product_baseID:
            continue
        elif bool(compartments & reactant_compartments) == True and bool(compartments & product_compartments) == True:
            transporters.append(reaction.id)
        
    return(transporters)

# Removes a given percentage of reactions from a model, ignoring objective
def generate_gaps(model, percentage=0.2, prune=False, ignore=[]):
    number_to_remove = int(round(len(model.reactions) * percentage))
    rxn_ids = [x.id for x in model.reactions]
    random.shuffle(rxn_ids)
    rxns_to_remove = rxn_ids[-number_to_remove:]
        
    for rxn in ignore:
        try:
            rxns_to_remove.remove(rxn)
        except ValueError:
            continue
    
    truncated_model = copy.deepcopy(model)
    truncated_model.remove_reactions(rxns_to_remove)
    
    if prune == True:
        unused_cpds = prune_unused_metabolites(truncated_model)
    
    print('Reactions removed: ' + str(len(rxns_to_remove)))
    print('New objective value: ' + str(truncated_model.slim_optimize()))
    return truncated_model, rxns_to_remove

In [11]:
# Calculates the sum of fluxes for a given model
def sum_of_fluxes(model):
    
    with model as m:
        solution = m.optimize()
        flux_sum = sum(list(solution.fluxes))
        
    return flux_sum


# reports the reaction ids that are in 1 model but not the other
def rxn_diff(model_1, model_2):
    diff = set([str(x.id) for x in model_1.reactions]).difference([str(y.id) for y in model_2.reactions])
    return list(diff)

# Check for missing transport and exchange reactions
def missingRxn(model, extracellular='extracellular'):

    transporters = set(findTransports(model))
    exchanges = set([x.id for x in model.exchanges])
    
    missing_exchanges = []
    missing_transports = []
    
    for metabolite in model.metabolites:
        if not metabolite.compartment == extracellular:
            continue

        curr_rxns = set([x.id for x in list(metabolite.reactions)])
        
        if not bool(curr_rxns & transporters):
            missing_transports.append(metabolite.id)
        if not bool(curr_rxns & exchanges):
            missing_exchanges.append(metabolite.id)
    
    if len(missing_transports) != 0:
        print(str(len(missing_transports)) + ' extracellular metabolites are missing transport reactions')
    if len(missing_exchanges) != 0:
        print(str(len(missing_exchanges)) + ' extracellular metabolites are missing exchange reactions')
        
    return missing_transports, missing_exchanges

In [12]:
#Biomass
#https://pubmed.ncbi.nlm.nih.gov/27939572/
#Integration of Biomass Formulations of Genome-Scale Metabolic Models with Experimental Data 
#Reveals Universally Essential Cofactors in Prokaryotes
#Joana C Xavier, Kiran Raosaheb Patil, Isabel Rocha
#PMID: 27939572 PMCID: PMC5249239

#Gc is missing as biomass components that were found to be universally essential:
#All of these components are metabolites in the model and just need to be added to the biomass equation.
#_acp
#_amp
#_cmp
#_gmp
#_cdp
#_gdp

#Add ACP as a biomass component
r=model.reactions.get_by_id('Nm_Ess_biomass')


NameError: name 'model' is not defined

In [13]:
model.metabolites.trp_L_c.annotation['bigg.metabolite'] = 'trp__L'

NameError: name 'model' is not defined

In [14]:
# Import libraries - REQUIRES pip version 9.0.3
import pandas
import os
from os.path import join
import sys
import scipy.stats
import numpy
import math
import pickle
import copy
import time
import warnings
import gc

# Using Cobrapy 0.13.0
import cobra
import cobra.test
import cobra.flux_analysis.gapfilling
from cobra import Reaction, Metabolite
from cobra.io import write_sbml_model
from cobra.flux_analysis import  flux_variability_analysis
from cobra.flux_analysis.reaction import assess_component
from cobra.manipulation.delete import *
from cobra.flux_analysis.parsimonious import add_pfba
from cobra.medium import find_boundary_types
from cobra.util import solver as sutil


#Simplify reading/writing files
cwd=os.path.realpath(os.path.join(os.path.dirname(os.getcwd()),"..",".."))

In [15]:
# Quicker way to read in models
import pickle
def read_model(fileName, obj='none'):
    
    fileType = fileName.split('.')[-1]
    
    if fileType == 'sbml' or fileType == 'xml':
        model = cobra.io.read_sbml_model(fileName)
    elif fileType == 'json':
        model = cobra.io.load_json_model(fileName)
    elif fileType == 'yaml':
        model = cobra.io.load_yaml_model(fileName)
    elif fileType == 'mat':
        model = cobra.io.load_matlab_model(fileName)
    elif fileType == 'pkl':
        model = pickle.load(open(fileName, 'rb'))
    else:
        raise TypeError('Unrecognized file extension')
    
    if obj != 'none': model.objective = obj
    for rxn in model.boundary: rxn.bounds = (-1000., 1000.)
        
    return model

In [18]:
model=read_model(cwd+'/Gc_GENRE_2022/Curate_Gc_Model/In_progress_curation/NGO_557.sbml')

In [19]:
model2=read_model(cwd+'/Gc_GENRE_2022/Curate_Gc_Model/In_progress_curation/NGO_557_polished.sbml')

In [20]:
model

0,1
Name,iNGO_557
Memory address,0x01696c6dd100
Number of metabolites,1341
Number of reactions,1433
Number of groups,0
Objective expression,1.0*Nm_Ess_biomass - 1.0*Nm_Ess_biomass_reverse_46af5
Compartments,"periplasm, cytosol, extracellular"


In [21]:
model2

0,1
Name,iNGO_557
Memory address,0x016970c30c10
Number of metabolites,1341
Number of reactions,1433
Number of groups,0
Objective expression,1.0*Nm_Ess_biomass - 1.0*Nm_Ess_biomass_reverse_46af5
Compartments,"periplasm, cytosol, extracellular space, default"


In [29]:
model.metabolites.pyr_c.annotation

{}

In [28]:
model2.metabolites.pyr_c.annotation

{'bigg.metabolite': 'pyr',
 'biocyc': 'META:PYRUVATE',
 'chebi': ['CHEBI:14987',
  'CHEBI:15361',
  'CHEBI:26462',
  'CHEBI:26466',
  'CHEBI:32816',
  'CHEBI:45253',
  'CHEBI:8685'],
 'hmdb': 'HMDB00243',
 'inchikey': 'LCTONWCANYUPML-UHFFFAOYSA-M',
 'kegg.compound': 'C00022',
 'lipidmaps': 'LMFA01060077',
 'metanetx.chemical': 'MNXM23',
 'reactome': ['R-ALL-1130930',
  'R-ALL-113557',
  'R-ALL-29398',
  'R-ALL-389680',
  'R-ALL-5357717'],
 'seed.compound': 'cpd00020'}

In [35]:
for x in model.metabolites:
    if x.charge is not model2.metabolites.get_by_id(str(x)).charge:
        print(x.id)

pg161_c
3hcmrs7eACP_c
dttp_c
spmd_c
f6p_e
f6p_c
adp_c
aso3_p
pg161_p
glcur1p_p
anth_c
cmp_e
anhgm4p_p
5dglcn_p
methf_c
ps161_c
anhgm4p_c
thf_c
ps120_c
4h2oglt_c
ahdt_c
3php_c
murein5p5p5p_p
thm_p
u3hga_c
thm_c
thm_e
acon_C_c
cit_p
cit_c
udpg_c
murein4px4px4p_p
udpg_e
udpg_p
hmbil_c
udpacgal_p
udpacgal_e
iasp_c
LalaDgluMdap_p
LalaDgluMdap_c
LalaDgluMdap_e
ac_c
ac_e
no3_p
ac_p
no3_e
ag_e
ag_c
cu2_p
glx_c
1odecg3p_c
cu2_c
cu2_e
2agpg140_p
2agpg140_c
dtdp_c
2agpg180_c
succ_p
succ_e
succ_c
dgmp_c
dgmp_e
dgmp_p
unaga_c
ctp_c
pi_p
h_p
btn_c
gthrd_c
gthrd_e
gthrd_p
ppcoa_c
ddcacoa_c
uacgam_c
ugmda_c
uacgam_p
phom_c
2obut_c
cd2_p
cd2_e
cd2_c
2pglyc_c
2mcit_c
acnam_c
pg120_c
ugmd_c
5dglcn_e
frulys_p
3ig3p_c
ppp9_c
frulys_e
glyclt_p
2dda7p_c
pg120_p
ppbng_c
uaagmda_c
1pyr5c_c
cmp_p
amet_c
u23ga_c
skm5p_c
pppg9_c
pa181_c
pa181_p
colipa_e
sl26da_c
gmhep7p_c
tdcoa_c
hg2_p
gar_c
fadh2_c
hg2_e
fum_c
fum_e
fum_p
acg5sa_c
2ddecg3p_c
stcoa_c
hmgth_c
nh4_c
pgp161_p
4ampm_c
pa141_p
pgp161_c
pa141_c
ckdo_c


In [44]:
ecoli = read_model(cwd+ '/Gc_GENRE_2022/Models/ecoli.json')
test=read_model('C:/Users/Aimee/Documents/Metabolic_Modeling/organized/models/annotatedGCmodel.json')

In [46]:
for x in test.metabolites:
    if x in ecoli.metabolites and x.charge is not ecoli.metabolites.get_by_id(str(x)).charge:
            print(x.id)
    else:
        pass

murein5p5p5p_p
murein4px4px4p_p
hmbil_c
colipa_e
murein4px4p4p_p
gdptp_c
kdo2lipid4_c
dscl_c
hhlipa_c
hlipa_c
icolipa_c
uppg3_c
enlipa_e
lipa_c
lipa_e
lipa_p
5mthf_c
sheme_c
murein5px4px4p_p
prpp_c
kdo2lipid4L_c
prbatp_c
ppgpp_c
apoACP_c


In [54]:
for x in test.metabolites:
    if x not in ecoli.metabolites and x.charge == 0:
        print(x.id)
    else:
        pass

lyx_L_p
lyx_L_e
hcys_L_c
tyr_L_p
tyr_L_c
tyr_L_e
ala_L_c
ala_L_e
ala_L_p
val_L_c
val_L_e
val_L_p
lald_L_c
cys_D_p
cys_D_e
murein4p4p4p_p
hom_L_e
hom_L_c
hom_L_p
ser_L_p
ser_L_c
ser_L_e
biliverd_c
his_L_p
his_L_e
his_L_c
tartr_L_p
tartr_L_e
sbt_D_p
sbt_D_e
glyc_R_p
glyc_R_e
galct_D_e
xylu_L_p
galct_D_p
xylu_L_e
arab_L_e
26dap_M_c
galctn_D_e
co_p
co_c
co_e
pnto_R_e
pnto_R_p
xyl_D_p
athr_L_c
murein4p4p4pOAc_p
arab_L_p
Hg2_p
Hg2_c
1p3h5c_e
4hpro_LT_c
1p3h5c_c
e4hglu_e
cyst_L_c
met_L_p
met_L_e
met_L_c
nicrns_c
glc_D_p
glc_D_e
glc_D_c
all_D_p
all_D_e
Tf_e
mal_D_e
mal_D_p
ala_D_p
ala_D_c
ala_D_e
galctn_D_p
cys_L_e
cys_L_c
cys_L_p
rib_D_c
rib_D_e
rib_D_p
Hg_e
citr_L_c
ser_D_c
ser_D_e
pro_L_c
ser_D_p
pro_L_p
gln_L_p
gln_L_c
gln_L_e
pe141b_c
pe141b_p
phe_L_c
leu_L_p
leu_L_c
leu_L_e
ile_L_p
ile_L_e
ile_L_c
Lf_e
LfFe3_e
thr_L_p
thr_L_c
thr_L_e
pser_L_p
pser_L_e
asn_L_c
asn_L_e
galctn_L_e
asn_L_p
galctn_L_p
pro_L_e
fuc_L_e
fuc_L_p
metsox_R_L_p
metsox_R_L_e
metsox_R_L_c
phe_L_e
ascb_L_e
ascb_L_p
phe

In [None]:
test.metabolite.

In [320]:
biggmets=[]
for mets in model.metabolites:
    if mets in universalmodel.metabolites:
        mets.charge == universalmodel.metabolites.get_by_id(str(mets)).charge
    else:
        biggmets.append(mets.id)
        print(mets.id)

for x in model.metabolites:
    y=x.id
    if y in biggmets and '_S' in y:
        x.id= y[:-3] + '_' + y[-3:]
    if y in biggmets and '_L' in y:
        x.id= y[:-3] + '_' + y[-3:]
    if y in biggmets and '_D' in y:
        x.id= y[:-3] + '_' + y[-3:]
        
biggmets=[]
for mets in model.metabolites:
    if mets in universalmodel.metabolites:
        mets.charge == universalmodel.metabolites.get_by_id(str(mets)).charge
    else:
        biggmets.append(mets.id)
        print(mets.id)

for x in model.metabolites:
    y=x.id
    if y in biggmets and '_R' in y:
        x.id= y[:-3] + '_' + y[-3:]

biggmets=[]
for mets in model.metabolites:
    if mets in universalmodel.metabolites:
        mets.charge == universalmodel.metabolites.get_by_id(str(mets)).charge
    else:
        biggmets.append(mets.id)
        print(mets.id) 

galicolipa_c
cmrs3eACP_c
cdp161_c
gnagalicolipa_c
cdpdodec7eg_c
murein4p4p4p_p
2hdect9g3p_c
2hdect9g3p_p
1hdect9eg3p_c
glyc_R_p
glyc_R_c
glyc_R_e
murein4p4pOAc_p
26dap_M_c
h5deACP_c
murein5p4pOAc_p
pnto_R_c
pnto_R_e
pnto_R_p
focytC_p
murein4p4p4pOAc_p
murein4px4px4pOAc_p
Hg2_p
Hg2_c
dhap_p
ficytC_p
1p3h5c_e
3ocpalm5eACP_c
e4hglu_e
t3c5palmeACP_c
3hcpalm5eACP_c
t3c7vaceACP_c
Tf_e
TfFe3_e
murein4p3pOAc_p
Hg_e
3hcvac7eACP_c
pe141b_c
pe141b_p
1odec7eg3p_c
gnahhlipa_c
Lf_e
LfFe3_e
pant_R_c
oct7eACP_c
murein4p4pdOAc_p
3ocvac7eACP_c
pa160a_c
tdec3eACP_c
sialylated_los_GC_c
los_GC_c
2odec7eg3p_c
2odec7eg3p_p
hdcat9_p
biomass
HgFe3_e
hdet9ACP_c
cystine__L_p
glcgnahhlipa_c
galglcgnahhlipa_c
murein4px4px4pdOAc_p
murein4px4px4ptOAc_p
galicolipa_c
cmrs3eACP_c
cdp161_c
gnagalicolipa_c
cdpdodec7eg_c
murein4p4p4p_p
2hdect9g3p_c
2hdect9g3p_p
1hdect9eg3p_c
glyc_R_p
glyc_R_c
glyc_R_e
murein4p4pOAc_p
26dap_M_c
h5deACP_c
murein5p4pOAc_p
pnto_R_c
pnto_R_e
pnto_R_p
focytC_p
murein4p4p4pOAc_p
murein4px4px4pOA