In [2]:
import cobra
import copy

In [3]:
# curated Clostridium difficile 630 model
iCdG707 = cobra.io.read_sbml_model('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/reconstructions/iCdG707.sbml')
for x in iCdG707.boundary: x.bounds = (-1000.,1000.)

In [4]:
iCdG707

0,1
Name,iCdG707
Memory address,0x07f38b5bb5290
Number of metabolites,1184
Number of reactions,1214
Number of groups,0
Objective expression,1.0*biomass - 1.0*biomass_reverse_01e59
Compartments,"cytosol, extracellular, periplasm"


In [5]:
len(iCdG707.genes)

707

In [6]:
iCdG707.slim_optimize()

174.1129448494124

In [7]:
# Get model gene IDs
iCdG707_genes = set([x.id for x in iCdG707.genes])
patric_draft = cobra.io.load_json_model('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/r20291_draft.json')
draft_R20291_genes = set([x.id for x in patric_draft.genes])

In [8]:
# Remove free biomass components
patric_draft.reactions.rxn13782_c.remove_from_model(remove_orphans=True)
patric_draft.reactions.rxn13784_c.remove_from_model(remove_orphans=True)

In [9]:
# Read in protein alignments

# 630 vs R20291
Cd630_geneDict = {}
R20291_missing = []
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/proteome_comparisons/Cd630_R20291.tsv', 'r') as genes:
    header = genes.readline()
    for line in genes:
        Cd630_gene = line.split()[0]
        R20291_gene = line.split()[1]
        
        if not Cd630_gene in iCdG707_genes:
            continue
        elif R20291_gene == 'none':
            R20291_missing.append(Cd630_gene)
            continue
        else:
            Cd630_geneDict[Cd630_gene] = R20291_gene   

# R20291 vs 630
R20291_geneDict = {}
Cd630_missing = []
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/proteome_comparisons/R20291_Cd630.tsv', 'r') as genes:
    header = genes.readline()
    for line in genes:
        R20291_gene = line.split()[0]
        Cd630_gene = line.split()[1]
        
        if Cd630_gene == 'none':
            Cd630_missing.append(R20291_gene)
            continue
        else:
            R20291_geneDict[R20291_gene] = Cd630_gene  
            
# Peptoclostridium 630 vs R20291
Pepto630_geneDict = {}
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/proteome_comparisons/Pepto630_R20291.tsv', 'r') as genes:
    header = genes.readline()
    for line in genes:
        Pepto630_gene = line.split()[0]
        R20291_gene = line.split()[1]
        
        if not Pepto630_gene in iCdG707_genes:
            continue
        elif R20291_gene == 'none':
            R20291_missing.append(Pepto630_gene)
            continue
        else:
            Pepto630_geneDict[Pepto630_gene] = R20291_gene   

# R20291 vs Peptoclostridium 630
R20291_geneDict = {}
Pepto630_missing = []
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/proteome_comparisons/R20291_Pepto630.tsv', 'r') as genes:
    header = genes.readline()
    for line in genes:
        R20291_gene = line.split()[0]
        Pepto630_gene = line.split()[1]
        
        if Cd630_gene == 'none':
            Pepto630_missing.append(R20291_gene)
            continue
        else:
            R20291_geneDict[R20291_gene] = Pepto630_gene  

# Create complete translation dictionary
geneDict = {}
geneDict.update(Cd630_geneDict)
geneDict.update(Pepto630_geneDict)

# Get gene objects to be removed
remove_genes = [iCdG707.genes.get_by_id(x) for x in set(R20291_missing)]

In [10]:
# Create duplicate GENRE
draft_GENRE = copy.deepcopy(iCdG707)

In [11]:
# Delete genes that are not present in R20291
R20291_missing.append('1151372.4.peg.1723')
remove_genes = [draft_GENRE.genes.get_by_id(x) for x in set(R20291_missing)]
cobra.manipulation.delete.remove_genes(draft_GENRE, remove_genes)

In [12]:
# Rename homologous genes
cobra.manipulation.modify.rename_genes(draft_GENRE, geneDict)

In [13]:
# Add genes that only appear in R20291
included_genes = list(set(draft_R20291_genes).intersection(set(Cd630_missing)))
current_rxn_ids = set([x.id for x in draft_GENRE.reactions])
rxns_added = 0
add_manually = []
for gene in patric_draft.genes:
    if not gene.id in included_genes: 
        add_manually.append(gene.id)
        continue
    
    for rxn in gene.reactions:
        if rxn.id in current_rxn_ids:
            # update existing gene_reaction_rule
            current_gpr = draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule
            if current_gpr != '':
                current_gpr += ' or ' + gene.id
            else:
                current_gpr = gene.id
            draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule = current_gpr
            # update gene name
            draft_GENRE.genes.get_by_id(gene.id).name = patric_draft.genes.get_by_id(gene.id).name 
        else:
            draft_GENRE.add_reactions([rxn])
            rxns_added += 1
            current_gpr = draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule
            draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule = current_gpr
            
print('Reactions added: ' + str(rxns_added))

Reactions added: 4


In [14]:
# Identify genes that must be added from the draft reconstruction
draft_genes = set([gene.id for gene in draft_GENRE.genes])
add_manually = set(add_manually).difference(draft_genes)
current_rxn_ids = set([x.id for x in draft_GENRE.reactions])

for gene in add_manually:
    for rxn in patric_draft.genes.get_by_id(gene).reactions:
        if rxn.id in current_rxn_ids:
            # update existing gene_reaction_rule
            current_gpr = draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule
            if current_gpr != '':
                current_gpr += ' or ' + gene
            else:
                current_gpr = gene
            draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule = current_gpr
            # update gene name
            draft_GENRE.genes.get_by_id(gene).name = patric_draft.genes.get_by_id(gene).name 
        else:
            draft_GENRE.add_reactions([rxn])
            draft_GENRE.genes.get_by_id(gene).name = patric_draft.genes.get_by_id(gene).name 
            rxns_added += 1

print('Reactions added: ' + str(rxns_added))

Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn05528_c' since it already exists.
Ignoring reaction 'rxn10481_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn13783_c' since it already exists.
Ignoring reaction 'rxn05528_c' since it already exists.
Ignoring reaction 'rxn10481_c' since it already exists.
Ignoring reaction 'rxn05516_c' since it already 

Reactions added: 46


In [15]:
# Fix a few remaining incorrect GPRs 
draft_GENRE.reactions.rxn02003_c.gene_reaction_rule = '645463.3.peg.237'
draft_GENRE.reactions.rxn14293_c.gene_reaction_rule += ' or 645463.3.peg.260'
draft_GENRE.reactions.rxn06517_c.gene_reaction_rule += ' or 645463.3.peg.1978'
draft_GENRE.reactions.rxn01404_c.gene_reaction_rule += ' or 645463.3.peg.658'
draft_GENRE.reactions.rxn01368_c.gene_reaction_rule += ' or 645463.3.peg.769'
draft_GENRE.reactions.rxn00119_c.gene_reaction_rule += ' or 645463.3.peg.2191'

In [16]:
# Replace removed Biomass component
cpd02967_c = draft_GENRE.metabolites.get_by_id('cpd02967_c') # N-Acetyl-beta-D-mannosaminyl-1,4-N-acetyl-D-glucosaminyldiphosphoundecaprenol
cpd00402_c = draft_GENRE.metabolites.get_by_id('cpd00402_c') # CDPglycerol
cpd00046_c = draft_GENRE.metabolites.get_by_id('cpd00046_c') # CMP

cpd12894_c = cobra.Metabolite(
    'cpd12894_c',
    formula='',
    name='Teichoic acid',
    compartment='cytosol')

# R20291 homolog unknown - Stabler et al. (2009). Genome Biol.
#gene = cobra.Gene('272563.8.peg.262')
#gene.name = 'Putative CDP-glycerol:Poly(Glycerophosphate) glycerophosphotransferase'

teichoicacid_rxn = cobra.Reaction('teichoicacid_rxn')
#teichoicacid_rxn.gene_reaction_rule = '272563.8.peg.262'
teichoicacid_rxn.name = 'Teichoic acid biosynthesis'
teichoicacid_rxn.lower_bound = 0.
teichoicacid_rxn.upper_bound = 1000.
teichoicacid_rxn.add_metabolites({
    cpd02967_c: -1.0,
    cpd00402_c: -1.0,
    cpd00046_c: 1.0,
    cpd12894_c: 1.0
})
draft_GENRE.add_reactions([teichoicacid_rxn])

In [17]:
# Replace Stickland pathway reaction
cpd00005_c = draft_GENRE.metabolites.get_by_id('cpd00005_c') # NADPH
cpd00067_c = draft_GENRE.metabolites.get_by_id('cpd00067_c') # H+
cpd00498_c = draft_GENRE.metabolites.get_by_id('cpd00498_c') # 2-Aceto-2-hydroxybutanoate
cpd00006_c = draft_GENRE.metabolites.get_by_id('cpd00006_c') # NADP
cpd02535_c = draft_GENRE.metabolites.get_by_id('cpd02535_c') # 2,3-Dihydroxy-3-methylvalerate

gene = cobra.Gene('645463.3.peg.1493')
gene.name = 'Ketol-acid reductoisomerase (NADP(+)) (EC 1.1.1.86)'

rxn08764_c = cobra.Reaction('rxn08764_c')
rxn08764_c.gene_reaction_rule = '645463.3.peg.1493'
rxn08764_c.name = 'Teichoic acid biosynthesis'
rxn08764_c.lower_bound = -1000.
rxn08764_c.upper_bound = 1000.
rxn08764_c.add_metabolites({
    cpd00005_c: -1.0,
    cpd00067_c: -1.0,
    cpd00498_c: -1.0,
    cpd00006_c: 1.0,
    cpd02535_c: 1.0
})
draft_GENRE.add_reactions([rxn08764_c])

In [18]:
# Remove DNA demand
try:
    draft_GENRE.reactions.get_by_id('rxn13783_c').remove_from_model(remove_orphans=True)
except:
    pass

In [19]:
# Prune orphaned nodes
cpd_total = 0
rxn_total = 0
removed = 1
while removed == 1:
    removed = 0

    # Metabolites
    for cpd in draft_GENRE.metabolites:
        if len(cpd.reactions) == 0:
            cpd.remove_from_model()
            removed = 1
            cpd_total += 1

    # Reactions
    for rxn in draft_GENRE.reactions:
        if len(rxn.metabolites) == 0: 
            rxn.remove_from_model()
            removed = 1
            rxn_total += 1
    
print('Metabolites pruned: ' + str(cpd_total))
print('Reactions pruned: ' + str(rxn_total))

Metabolites pruned: 2
Reactions pruned: 0


In [20]:
# Fix compartments
for cpd in draft_GENRE.metabolites:
    if cpd.compartment == 'c':
        cpd.compartment = 'cytosol'
    elif cpd.compartment == 'e':
        cpd.compartment = 'extracellular'
    elif cpd.compartment == 'p':
        cpd.compartment = 'periplasm'

In [21]:
# Open exchange bounds
for rxn in draft_GENRE.boundary:
    rxn.bounds = (-1000.0,1000.0)

In [22]:
# Test that new model grows
draft_GENRE.slim_optimize()

174.11294484941234

In [23]:
# Test growth in minimal media conditions

# Karlsson et al. (1999). Microbiology.
mdm = ['cpd00001_e', # water
       'cpd00065_e', # L-Tryptophan
       'cpd00060_e', # L-Methionine
       'cpd00322_e', # L-Isoleucine
       'cpd00129_e', # L-Proline
       'cpd00156_e', # L-Valine
       'cpd00107_e', # L-Leucine
       'cpd00084_e', # L-Cysteine 
       'cpd00149_e', # Cobalt
       'cpd00099_e', # Chloride
       'cpd10515_e', # Iron
       'cpd00030_e', # Manganese
       'cpd00254_e', # Magnesium
       'cpd00063_e', # Calcium
       'cpd00205_e', # Potassium
       'cpd00009_e', # Phosphate
       'cpd00971_e', # Sodium
       'cpd00242_e', # Carbonate
       'cpd00104_e', # Biotin
       'cpd00644_e', # Pantothenate
       'cpd00263_e', # Pyridoxine
       'cpd00027_e'] # D-Glucose (Carbohydrate C-source)

# Theriot et al. (2013). Nature Communications.
ncmm = ['cpd00001_e', # water
        'cpd00104_e', # Biotin
        'cpd00644_e', # Pantothenate
        'cpd00263_e', # Pyridoxine
        'cpd00149_e', # Cobalt
        'cpd00099_e', # Chloride
        'cpd10515_e', # Iron
        'cpd00030_e', # Manganese
        'cpd00254_e', # Magnesium
        'cpd00063_e', # Calcium
        'cpd00205_e', # Potassium
        'cpd00009_e', # Phosphate
        'cpd00971_e', # Sodium
        'cpd00242_e', # Carbonate
        'cpd00322_e', # L-Isoleucine
        'cpd00129_e', # L-Proline
        'cpd00156_e', # L-Valine
        'cpd00107_e', # L-Leucine
        'cpd00084_e', # L-Cysteine 
        'cpd00065_e', # L-Tryptophan
        'cpd00060_e', # L-Methionine
        'cpd00119_e', # L-Histidine
        'cpd00033_e', # Glycine
        'cpd00051_e', # L-Arginine
        'cpd00161_e'] # L-Threonine

ncmm_exchanges = ['EX_' + x for x in ncmm]
for rxn in draft_GENRE.boundary:
    if not rxn.id in ncmm_exchanges:
        rxn.bounds = (0.0,1000.0)
    else:
        rxn.bounds = (-1000.0,1000.0)
print('NCMM: ' + str(draft_GENRE.slim_optimize()))

mdm_exchanges = ['EX_' + x for x in mdm]
for rxn in draft_GENRE.boundary:
    if not rxn.id in mdm_exchanges:
        rxn.bounds = (0.0,1000.0)
    else:
        rxn.bounds = (-1000.0,1000.0)
print('MDM: ' + str(draft_GENRE.slim_optimize()))

# Leave in MDM

NCMM: 45.47550954961071
MDM: 70.05819375320472


In [24]:
# Remove genes with no reactions
remove_genes = [draft_GENRE.genes.get_by_id(x) for x in ['645463.3.peg.1041','645463.3.peg.1043','645463.3.peg.1040','645463.3.peg.1042','645463.3.peg.1045','645463.3.peg.1044']]
cobra.manipulation.delete.remove_genes(draft_GENRE, remove_genes)

In [25]:
# Name new model
draft_GENRE.name = 'Clostridium difficile R20291'
draft_GENRE.id = 'iCdR' + str(len(draft_GENRE.genes))

In [26]:
draft_GENRE

0,1
Name,iCdR700
Memory address,0x07f38cc8e0110
Number of metabolites,1206
Number of reactions,1228
Number of groups,0
Objective expression,0.0 + 1.0*biomass - 1.0*biomass_reverse_01e59
Compartments,"cytosol, extracellular, periplasm"


In [27]:
# Add annotation information

# SBO designations
# Metabolites
for cpd in draft_GENRE.metabolites:
    cpd.annotation['sbo'] = 'SBO:0000247'
# Reactions
for rxn in draft_GENRE.reactions:
    substrates = list(rxn.metabolites)
    compartments = set([x.compartment for x in substrates])
    if 'EX_' in rxn.id:
        rxn.annotation['sbo'] = 'SBO:0000627' # exchange
    elif len(compartments) > 1:
        rxn.annotation['sbo'] = 'SBO:0000185' # transport
    else:
        rxn.annotation['sbo'] = 'SBO:0000176' # metabolic
# Biomass
draft_GENRE.reactions.dna_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.rna_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.protein_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.teichoicacid_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.peptidoglycan_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.cellwall_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.lipid_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.cofactor_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.biomass.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.SK_cpd11416_c.annotation['sbo'] = 'SBO:0000632'  
# Genes
for gene in draft_GENRE.genes:
    gene.annotation['sbo'] = 'SBO:0000243'

# RefSeq annotations
patric_refseq = {}
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/r20291_refseq_genes.tsv') as ref:
    for line in ref:
        patric_refseq[line.split()[0]] = line.split()[1]
for gene in draft_GENRE.genes:
    if '.peg.' in gene.id:
        gene.annotation['refseq'] = 'NC_013316.1'
        try:
            gene.annotation['refseq.locustag'] = patric_refseq[gene.id]
        except:
            continue

# Metabolites
for cpd in draft_GENRE.metabolites:
    if 'cpd' in cpd.id:
        new_id = str(cpd.id).split('_')
        if len(new_id) > 1:
            del new_id[-1]
        new_id = ''.join(new_id)
        cpd.annotation['seed.compound'] = new_id
draft_GENRE.metabolites.C21400_c.annotation['kegg.compound'] = 'C21400'        
draft_GENRE.metabolites.C21399_c.annotation['kegg.compound'] = 'C21399'        
draft_GENRE.metabolites.C21090_c.annotation['kegg.compound'] = 'C21090'        
draft_GENRE.metabolites.C21399_e.annotation['kegg.compound'] = 'C21399'        

# Reactions
for rxn in draft_GENRE.reactions:
    if 'rxn' in rxn.id or 'EX_' in rxn.id:
        new_id = str(rxn.id).split('_')
        if len(new_id) > 1:
            del new_id[-1]
        new_id = ''.join(new_id)
        rxn.annotation['seed.reaction'] = new_id
    elif 'R' in rxn.id:
        new_id = str(rxn.id).split('_')
        if len(new_id) > 1:
            del new_id[-1]
        new_id = ''.join(new_id)
        rxn.annotation['kegg.reaction'] = new_id
draft_GENRE.reactions.SK_cpd11416_c.annotation['seed.reaction'] = 'SK_cpd11416'
draft_GENRE.reactions.ENOG4108HXH_c.annotation['eggnog.reaction'] = 'ENOG4108HXH'
draft_GENRE.reactions.COG3601_c.annotation['cog.reaction'] = 'COG3601'
draft_GENRE.reactions.K20025_c.annotation['kegg.reaction'] = 'K20025'

# Genes
for gene in draft_GENRE.genes:
    if '.peg.' in gene.id:
        gene.annotation['patric'] = 'fig|' + gene.id

In [28]:
# Correct residual gene names
draft_GENRE.genes.get_by_id('645463.3.peg.1176').name = 'Na(+)-dependent branched-chain amino acid transporter'
draft_GENRE.genes.get_by_id('645463.3.peg.735').name = 'ABC transporter, substrate-binding protein'
draft_GENRE.genes.get_by_id('645463.3.peg.2463').name = 'PTS system, glucitol/sorbitol-specific IIB component'
draft_GENRE.genes.get_by_id('645463.3.peg.929').name = 'N-acetylgalactosamine-6-phosphate deacetylase'
draft_GENRE.genes.get_by_id('645463.3.peg.2324').name = 'ribulose-5-phosphate 4-epimerase'
draft_GENRE.genes.get_by_id('645463.3.peg.46').name = 'Ribulose-5-phosphate 4-epimerase'
draft_GENRE.genes.get_by_id('645463.3.peg.3015').name = 'PTS system, IIB component'
draft_GENRE.genes.get_by_id('645463.3.peg.3014').name = 'PTS system, IIC component'
draft_GENRE.genes.get_by_id('645463.3.peg.3016').name = 'PTS system, IIA component'
draft_GENRE.genes.get_by_id('645463.3.peg.865').name = 'ABC transporter, substrate-binding protein'
draft_GENRE.genes.get_by_id('645463.3.peg.862').name = 'ABC transporter, substrate-binding protein'
draft_GENRE.genes.get_by_id('645463.3.peg.664').name = 'sodium-solute symporter, putative'
draft_GENRE.genes.get_by_id('645463.3.peg.1783').name = 'ABC transporter, ATP-binding protein'
draft_GENRE.genes.get_by_id('645463.3.peg.1421').name = 'DNA-3-methyladenine glycosylase II'
draft_GENRE.genes.get_by_id('645463.3.peg.2254').name = '2-oxoglutarate/2-oxoacid ferredoxin oxidoreductase, delta subunit'
draft_GENRE.genes.get_by_id('645463.3.peg.2253').name = '2-oxoglutarate/2-oxoacid ferredoxin oxidoreductase, alpha subunit'
draft_GENRE.genes.get_by_id('645463.3.peg.168').name = '4-hydroxyphenylacetate decarboxylase, small subunit'
draft_GENRE.genes.get_by_id('645463.3.peg.167').name = '4-hydroxyphenylacetate decarboxylase, large subunit'
draft_GENRE.genes.get_by_id('645463.3.peg.2417').name = 'Indolepyruvate oxidoreductase subunit'
draft_GENRE.genes.get_by_id('645463.3.peg.2389').name = 'Glycine reductase component B beta subunit'
draft_GENRE.genes.get_by_id('645463.3.peg.392').name = '2-hydroxyglutaryl-CoA dehydratase, A-component'
draft_GENRE.genes.get_by_id('645463.3.peg.393').name = '2-hydroxyglutaryl-CoA dehydratase, B-component'
draft_GENRE.genes.get_by_id('645463.3.peg.390').name = 'Hydroxyproline dehydratase putative'
draft_GENRE.genes.get_by_id('645463.3.peg.391').name = 'CoA-substrate-specific enzyme activase; 2-hydroxyglutaryl-CoA dehydratase activator, A-component'
draft_GENRE.genes.get_by_id('645463.3.peg.1466').name = 'NADH-dependent reduced ferredoxin:NADP+ oxidoreductase subunit A'
draft_GENRE.genes.get_by_id('645463.3.peg.169').name = '4-hydroxyphenylacetate decarboxylase activating enzyme'
draft_GENRE.genes.get_by_id('645463.3.peg.2250').name = 'Acetyl-CoA synthetase (ADP-forming) alpha and beta chains, putative'
draft_GENRE.genes.get_by_id('645463.3.peg.2379').name = 'Succinate-semialdehyde dehydrogenase, CoA-dependent'
draft_GENRE.genes.get_by_id('645463.3.peg.976').name = 'Electron bifurcating butyryl-CoA dehydrogenase (NAD+, ferredoxin)'
draft_GENRE.genes.get_by_id('645463.3.peg.2703').name = 'UDP-N-acetylmuramoyl-tripeptide--D-alanyl-D-alanine ligase'
draft_GENRE.genes.get_by_id('645463.3.peg.1493').name = 'Ketol-acid reductoisomerase (NADP(+)) (EC 1.1.1.86) | ilvC'

In [29]:
# Add additional annotation information

# BiGG
BiGG_rxn_ids = {}
with open('/home/mjenior/Desktop/repos/rxns_BiGG_ids.txt', 'r') as bigg_rxns:
    for line in bigg_rxns:
        line = line.split('=')
        modelseed = line[0]
        bigg = line[1].strip().split(',')
        if len(bigg) == 1: bigg = bigg[0]
        BiGG_rxn_ids[modelseed] = bigg
BiGG_cpd_ids = {}
with open('/home/mjenior/Desktop/repos/cpds_BiGG_ids.txt', 'r') as bigg_rxns:
    for line in bigg_rxns:
        line = line.split('=')
        modelseed = line[0]
        bigg = line[1].strip().split(',')
        if len(bigg) == 1: bigg = bigg[0]
        BiGG_cpd_ids[modelseed] = bigg

# KEGG
KEGG_rxn_ids = {}
with open('/home/mjenior/Desktop/repos/rxns_KEGG_ids.txt', 'r') as kegg_rxns:
    for line in kegg_rxns:
        line = line.split('=')
        modelseed = line[0]
        kegg = line[1].strip().split(',')
        if len(kegg) == 1: kegg = kegg[0]
        KEGG_rxn_ids[modelseed] = kegg
KEGG_cpd_ids = {}
with open('/home/mjenior/Desktop/repos/cpds_KEGG_ids.txt', 'r') as kegg_rxns:
    for line in kegg_rxns:
        line = line.split('=')
        modelseed = line[0]
        kegg = line[1].strip().split(',')
        if len(kegg) == 1: kegg = kegg[0]
        KEGG_cpd_ids[modelseed] = kegg

# Add annotations
bigg_rxn_success = 0
bigg_cpd_success = 0
kegg_rxn_success = 0
kegg_cpd_success = 0

for rxn in draft_GENRE.reactions:
    new_id = rxn.id.split('_')[0]
    try:
        bigg_id = BiGG_rxn_ids[new_id]
        draft_GENRE.reactions.get_by_id(rxn.id).annotation['bigg.metabolite'] = bigg_id
        bigg_rxn_success += 1
    except:
        pass
    try:
        kegg_id = KEGG_rxn_ids[new_id]
        draft_GENRE.reactions.get_by_id(rxn.id).annotation['kegg.compound'] = kegg_id
        kegg_rxn_success += 1
    except:
        pass

for cpd in draft_GENRE.metabolites:
    new_id = cpd.id.split('_')[0]
    try:
        bigg_id = BiGG_cpd_ids[new_id]
        draft_GENRE.metabolites.get_by_id(cpd.id).annotation['bigg.metabolite'] = bigg_id
        bigg_cpd_success += 1
    except:
        pass
    try:
        kegg_id = KEGG_cpd_ids[new_id]
        draft_GENRE.metabolites.get_by_id(cpd.id).annotation['kegg.compound'] = kegg_id
        kegg_cpd_success += 1
    except:
        pass

In [30]:
# Add additional gene annotations

uniprot_gene_ids = {}
ec_gene_ids = {}
refseq_gene_ids = {}
kegg_gene_ids = {}
patric_genes = set()

with open('/home/mjenior/Desktop/repos/uniprot_CdR20291.tab', 'r') as gene_annotation:
    header = gene_annotation.readline()
    for line in gene_annotation:
        line = line.split()
        uniprot_gene_ids[line[4]] = line[0]
        ec_gene_ids[line[4]] = line[1]
        refseq_gene_ids[line[4]] = line[2]
        kegg_gene_ids[line[4]] = line[3]
        patric_genes |= set([line[4]])

success = 0
for gene in draft_GENRE.genes:
    if not gene.id in patric_genes: continue
    
    success += 1
    gene.annotation['uniprot'] = uniprot_gene_ids[gene.id]
    gene.annotation['kegg.genes'] = kegg_gene_ids[gene.id]
    
    if refseq_gene_ids[gene.id] != 'none':
        gene.annotation['refseq'] = refseq_gene_ids[gene.id]
    
    if ec_gene_ids[gene.id] != 'none':
        gene.annotation['ec_number'] = ec_gene_ids[gene.id]
    


In [None]:
# Unblock Lactose catabolism

cpd00208_c = draft_GENRE.metabolites.cpd00208_c # Lactose
cpd00001_c = draft_GENRE.metabolites.cpd00001_c # H2O
cpd00027_c = draft_GENRE.metabolites.cpd00027_c # Glucose

cpd00108_c = cobra.Metabolite(
    'cpd00108_c',
    formula='C6H12O6',
    name='Galactose',
    compartment='cytosol')
cpd00108_c.annotation['sbo'] = 'SBO:0000247'
cpd00108_c.annotation['seed.compound'] = 'cpd00108'
cpd00108_c.annotation['bigg.metabolite'] = 'gal'
cpd00108_c.annotation['kegg.compound'] = 'C00124'

rxn00816_c = cobra.Reaction('rxn00816_c')
rxn00816_c.name = 'Beta-galactosidase'
rxn00816_c.lower_bound = 0.
rxn00816_c.upper_bound = 1000.
rxn00816_c.add_metabolites({
    cpd00208_c: -1.0,
    cpd00001_c: -1.0,
    cpd00027_c: 1.0,
    cpd00108_c: 1.0
})
rxn00816_c.annotation['seed.reaction'] = 'rxn00816'
rxn00816_c.annotation['kegg.reaction'] = 'R01100'
rxn00816_c.annotation['bigg.reaction'] = 'LACZ'
draft_GENRE.add_reactions([rxn00816_c])

cpd00002_c = draft_GENRE.metabolites.cpd00002_c # ATP
cpd00008_c = draft_GENRE.metabolites.cpd00008_c # ADP
cpd00067_c = draft_GENRE.metabolites.cpd00067_c # H
cpd00348_c = draft_GENRE.metabolites.cpd00348_c # galactose-1-phosphate 

rxn00808_c = cobra.Reaction('rxn00808_c')
rxn00808_c.name = 'ATP:D-galactose 1-phosphotransferase'
rxn00808_c.lower_bound = 0.
rxn00808_c.upper_bound = 1000.
rxn00808_c.add_metabolites({
    cpd00002_c: -1.0,
    cpd00108_c: -1.0,
    cpd00008_c: 1.0,
    cpd00067_c: 1.0,
    cpd00348_c: 1.0
})
rxn00808_c.annotation['seed.reaction'] = 'rxn00808'
rxn00808_c.annotation['kegg.reaction'] = 'R01092'
rxn00808_c.annotation['bigg.reaction'] = 'GAL1'
draft_GENRE.add_reactions([rxn00808_c])


In [None]:
# Unblock Salicin catabolism

cpd01553_c = draft_GENRE.metabolites.cpd01553_c # Saligenin

cpd01553_e = cobra.Metabolite(
    'cpd01553_e',
    formula='C7H8O2',
    name='Saligenin',
    compartment='extracellular')
cpd01553_e.annotation['sbo'] = 'SBO:0000247'
cpd01553_e.annotation['seed.compound'] = 'cpd01553'
cpd01553_e.annotation['bigg.metabolite'] = '2hymeph'
cpd01553_e.annotation['kegg.compound'] = 'C02323'
draft_GENRE.add_boundary(cpd01553_e, type='exchange', reaction_id='EX_cpd01553_e', lb=-1000.0, ub=1000.0)
draft_GENRE.reactions.EX_cpd01553_e.name = 'Saligenin exchange'
draft_GENRE.reactions.EX_cpd01553_e.annotation['sbo'] = 'SBO:0000627'

cpd00067_c = draft_GENRE.metabolites.cpd00067_c # H
cpd00067_e = draft_GENRE.metabolites.cpd00067_e # H

rxn05472_c = cobra.Reaction('rxn05472_c')
rxn05472_c.name = '2-(hydroxymethyl)phenol transport in/out via proton symport'
rxn05472_c.lower_bound = -1000.
rxn05472_c.upper_bound = 1000.
rxn05472_c.add_metabolites({
    cpd00067_e: -1.0,
    cpd01553_e: -1.0,
    cpd00067_c: 1.0,
    cpd01553_c: 1.0
})
rxn05472_c.annotation['sbo'] = 'SBO:0000185'
rxn05472_c.annotation['seed.reaction'] = 'rxn05472'
rxn05472_c.annotation['bigg.reaction'] = '2HXMPt6'
draft_GENRE.add_reactions([rxn05472_c])


In [None]:
# Correct Melezitose catabolism
remove_gene = [draft_GENRE.genes.get_by_id('1151372.4.peg.1723')]
try:
    cobra.manipulation.delete.remove_genes(draft_GENRE, remove_gene)
except:
    pass

cpd00001_c = draft_GENRE.metabolites.cpd00001_c # H2O
cpd05161_c = draft_GENRE.metabolites.cpd05161_c # Melezitose
cpd00027_c = draft_GENRE.metabolites.cpd00027_c # Glucose
cpd20885_c = draft_GENRE.metabolites.cpd20885_c # Turanose

R01103_c = cobra.Reaction('R01103_c')
R01103_c.name = 'Melezitose galactohydrolase'
R01103_c.lower_bound = 0.
R01103_c.upper_bound = 1000.
R01103_c.add_metabolites({
    cpd00001_c: -1.0,
    cpd05161_c: -1.0,
    cpd00027_c: 1.0,
    cpd20885_c: 1.0
})
R01103_c.annotation['kegg.reaction'] = 'R01103'
R01103_c.annotation['sbo'] = 'SBO:0000176'
draft_GENRE.add_reactions([R01103_c])

draft_GENRE.id = 'iCdR' + str(len(draft_GENRE.genes))

In [31]:
# Set media condition
for rxn in draft_GENRE.boundary:
    if not rxn.id in mdm_exchanges:
        rxn.bounds = (0.0,1000.0)
    else:
        rxn.bounds = (-1000.0,1000.0)

In [32]:
draft_GENRE.reactions.EX_cpd00104_e.name = 'Biotin exchange'
draft_GENRE.reactions.rxn05872_c.name = 'Secondary alcohol:NAD+ oxidoreductase'
draft_GENRE.reactions.EX_cpd00208_e.name = 'Lactose exchange'

In [33]:
# Label tRNA reactions as Biomass
tRNA = ["rxn06449_c","rxn06443_c","rxn06444_c","rxn06445_c","rxn06446_c","rxn06447_c","rxn06448_c","rxn06280_c","rxn06300_c","rxn06432_c","rxn06434_c","rxn06936_c","rxn06435_c","rxn06437_c","rxn06436_c","rxn06438_c","rxn06439_c","rxn06440_c","rxn06441_c","rxn06442_c"]
for rxn in tRNA: draft_GENRE.reactions.get_by_id(rxn).annotation['sbo'] = 'SBO:0000629'

In [34]:
draft_GENRE

0,1
Name,iCdR700
Memory address,0x07f38cc8e0110
Number of metabolites,1206
Number of reactions,1228
Number of groups,0
Objective expression,0.0 + 1.0*biomass - 1.0*biomass_reverse_01e59
Compartments,"cytosol, extracellular, periplasm"


In [35]:
from cobra.flux_analysis import parsimonious

# Function to calculate doubling time from objective value
def doublingTime(model):
    with model as m:
        if m.slim_optimize(error_value=0.) < 1e-6:
            print('GENRE has no objective flux')
        else:
            growth = (1. / float(m.slim_optimize())) * 3600.
            print(str(round(growth, 2)) + ' minutes doubling time')


# Identifies blocked reactions, 1% cutoff for fraction of optimum
def blockedReactions(model):
    
    with model as m:
        blocked = cobra.flux_analysis.variability.find_blocked_reactions(m)
        nogene_blocked = []
        for rxn in blocked:
            if m.reactions.get_by_id(rxn).gene_reaction_rule == '':
                nogene_blocked.append(rxn)

    #print(str(len(blocked)) + ' total reactions are blocked')
    fraction = (float(len(blocked)) / float(len(model.reactions))) * 100.
    fraction = round(fraction, 2)
    print(str(fraction) + '% reactions are blocked')
    
    return blocked


# Identify potentially gapfilled reactions, checks against pFBA solution
def missingGPR(model):
    
    noGene = []
    exclude = []
    for rxn in model.reactions:
        if len(list(rxn.genes)) == 0:
            if rxn.annotation['sbo'] != 'SBO:0000629':
                if rxn in model.boundary:
                    exclude.append(rxn.id)
                    continue
                else:
                    noGene.append(rxn.id)
    
    solution = parsimonious.pfba(model)
    active_rxns = set([rxn.id for rxn in model.reactions if abs(solution.fluxes[rxn.id]) > 1e-5])
    active_rxns = active_rxns.difference(set(exclude))
    noGene_active = set(noGene).intersection(active_rxns)

    fraction = float(len(model.reactions)) - float(len(exclude))
    fraction = (float(len(noGene)) / fraction) * 100.
    fraction = round(fraction, 2)
    print(str(fraction) + '% reactions without GPRs')
    
    fraction = (float(len(noGene_active)) / float(len(active_rxns))) * 100.
    fraction = round(fraction, 2)
    print(str(fraction) + '% of reactions used in pFBA solution have no GPR')
    
    return noGene_active


# Checks which cytosolic metabolites are generated for free (bacteria only)
def checkFreeMass(model, cytosol='cytosol'):

    free = []
    with model as m:
    
        # Close all exchanges
        for rxn in m.boundary: m.reactions.get_by_id(rxn.id).lower_bound = 0.
    
        # Create demand for each reaction and optimize individually
        reset_rxn = m.reactions[0].id
        for cpd in m.metabolites: 
            if cpd.compartment == cytosol:
                demand = cobra.Reaction('demand')
                demand.bounds = (0., 1000.)
                demand.add_metabolites({cpd: -1.0})
                m.add_reactions([demand])
                m.objective = demand
                obj_val = m.slim_optimize()
                if obj_val > 1e-8: free.append(cpd.id)
                m.objective = reset_rxn
                m.remove_reactions([demand])
    
    fraction = (float(len(free)) / float(len(model.metabolites))) * 100.
    fraction = round(fraction, 2)
    print(str(fraction) + '% metabolites are generated for free')

    return(free)


# Check for mass and charge balance in reactions
def checkBalance(model):
    
    with model as m:

        elements = set()
        for cpd in m.metabolites:
            try:
                elements |= set(cpd.elements.keys())
            except:
                pass
        
        massImbal = []
        failed = 0
        if len(elements) == 0:
            print('No elemental data associated with metabolites!')
            failed = 1
        else:
            for rxn in m.reactions:
                if rxn.annotation['sbo'] == 'SBO:0000629': 
                    continue
                elif rxn in m.boundary:
                    continue

                try:
                    test = rxn.check_mass_balance()
                except ValueError:
                    continue

                if len(list(test)) > 0:
                    if len(set(test.keys()).intersection(elements)) > 0: massImbal.append(rxn.id)
                        
    if failed != 1:
        fraction = (float(len(massImbal)) / float(len(model.reactions))) * 100.
        fraction = round(fraction, 2)
        print(str(fraction) + '% reactions are mass imbalanced')
        
    return massImbal


def basicCheck(model):
    
    # Determination
    if len(model.reactions) < len(model.metabolites): 
        print('GENRE is overdetermined')
    elif len(model.reactions) > len(model.metabolites):
        print('GENRE is underdetermined')
    else:
        pass
    
    # Compartments
    print('GENRE has ' + str(len(model.compartments.keys())) + ' compartment(s)')
    
    # Genes
    if len(model.genes) == 0: 
        print('GENRE has no gene data')
    else:
        print('GENRE has ' + str(len(model.genes)) + ' genes')
          
    # Growth
    doublingTime(model)

# Quicker way to read in models
import pickle
def read_model(fileName, obj='none'):
    
    fileType = fileName.split('.')[-1]
    
    if fileType == 'sbml' or fileType == 'xml':
        model = cobra.io.read_sbml_model(fileName)
    elif fileType == 'json':
        model = cobra.io.load_json_model(fileName)
    elif fileType == 'yaml':
        model = cobra.io.load_yaml_model(fileName)
    elif fileType == 'mat':
        model = cobra.io.load_matlab_model(fileName)
    elif fileType == 'pkl':
        model = pickle.load(open(fileName, 'rb'))
    else:
        raise TypeError('Unrecognized file extension')
    
    if obj != 'none': model.objective = obj
    for rxn in model.boundary: rxn.bounds = (-1000., 1000.)
        
    return model

In [36]:
basicCheck(draft_GENRE)
draft_noGPRblocked = blockedReactions(draft_GENRE)
draft_free = checkFreeMass(draft_GENRE)
draft_massImbal = checkBalance(draft_GENRE)
draft_nogene = missingGPR(draft_GENRE)

GENRE is underdetermined
GENRE has 3 compartment(s)
GENRE has 700 genes
51.39 minutes doubling time
48.94% reactions are blocked
0.0% metabolites are generated for free
0.0% reactions are mass imbalanced
12.66% reactions without GPRs
22.09% of reactions used in pFBA solution have no GPR


In [37]:
# Save to files
cobra.io.write_sbml_model(draft_GENRE, '/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/reconstructions/iCdR700.sbml')
cobra.io.save_json_model(draft_GENRE, '/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/reconstructions/iCdR700.json')