In [1]:
import cobra
import copy

In [2]:
# curated Clostridium difficile 630 model
iCdG791 = cobra.io.load_json_model('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/reconstructions/iCdG791.json')

In [3]:
iCdG791

0,1
Name,iCdG791
Memory address,0x07fd487ab83d0
Number of metabolites,1132
Number of reactions,1129
Number of groups,0
Objective expression,1.0*biomass - 1.0*biomass_reverse_01e59
Compartments,"cytosol, extracellular"


In [3]:
len(iCdG791.genes)

791

In [4]:
iCdG791.slim_optimize()

89.76884522354936

In [5]:
# Get model gene IDs
iCdG791_genes = set([x.id for x in iCdG791.genes])
patric_draft = cobra.io.load_json_model('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/r20291_draft.json')
draft_R20291_genes = set([x.id for x in patric_draft.genes])

In [6]:
# Read in protein alignments

# 630 vs R20291
Cd630_geneDict = {}
R20291_missing = []
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/proteome_comparisons/Cd630_R20291.tsv', 'r') as genes:
    header = genes.readline()
    for line in genes:
        Cd630_gene = line.split()[0]
        R20291_gene = line.split()[1]
        
        if not Cd630_gene in iCdG791_genes:
            continue
        elif R20291_gene == 'none':
            R20291_missing.append(Cd630_gene)
            continue
        else:
            Cd630_geneDict[Cd630_gene] = R20291_gene   

# R20291 vs 630
R20291_geneDict = {}
Cd630_missing = []
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/proteome_comparisons/R20291_Cd630.tsv', 'r') as genes:
    header = genes.readline()
    for line in genes:
        R20291_gene = line.split()[0]
        Cd630_gene = line.split()[1]
        
        if Cd630_gene == 'none':
            Cd630_missing.append(R20291_gene)
            continue
        else:
            R20291_geneDict[R20291_gene] = Cd630_gene  
            
# Peptoclostridium 630 vs R20291
Pepto630_geneDict = {}
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/proteome_comparisons/Pepto630_R20291.tsv', 'r') as genes:
    header = genes.readline()
    for line in genes:
        Pepto630_gene = line.split()[0]
        R20291_gene = line.split()[1]
        
        if not Pepto630_gene in iCdG791_genes:
            continue
        elif R20291_gene == 'none':
            R20291_missing.append(Pepto630_gene)
            continue
        else:
            Pepto630_geneDict[Pepto630_gene] = R20291_gene   

# R20291 vs Peptoclostridium 630
R20291_geneDict = {}
Pepto630_missing = []
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/proteome_comparisons/R20291_Pepto630.tsv', 'r') as genes:
    header = genes.readline()
    for line in genes:
        R20291_gene = line.split()[0]
        Pepto630_gene = line.split()[1]
        
        if Cd630_gene == 'none':
            Pepto630_missing.append(R20291_gene)
            continue
        else:
            R20291_geneDict[R20291_gene] = Pepto630_gene  

# Create complete translation dictionary
geneDict = {}
geneDict.update(Cd630_geneDict)
geneDict.update(Pepto630_geneDict)

# Get gene objects to be removed
remove_genes = [iCdG791.genes.get_by_id(x) for x in set(R20291_missing)]

In [7]:
# Create duplicate GENRE
draft_GENRE = copy.deepcopy(iCdG791)

In [8]:
# Delete genes that are not present in R20291
R20291_missing.append('1151372.4.peg.1723')
remove_genes = [draft_GENRE.genes.get_by_id(x) for x in set(R20291_missing)]
cobra.manipulation.delete.remove_genes(draft_GENRE, remove_genes)

In [9]:
# Rename homologous genes
cobra.manipulation.modify.rename_genes(draft_GENRE, geneDict)

In [10]:
# Add genes that only appear in R20291
included_genes = list(set(draft_R20291_genes).intersection(set(Cd630_missing)))
current_rxn_ids = set([x.id for x in draft_GENRE.reactions])
rxns_added = 0
add_manually = []
for gene in patric_draft.genes:
    if not gene.id in included_genes: 
        add_manually.append(gene.id)
        continue
    
    for rxn in gene.reactions:
        if rxn.id in current_rxn_ids:
            # update existing gene_reaction_rule
            current_gpr = draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule
            if current_gpr != '':
                current_gpr += ' or ' + gene.id
            else:
                current_gpr = gene.id
            draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule = current_gpr
            # update gene name
            draft_GENRE.genes.get_by_id(gene.id).name = patric_draft.genes.get_by_id(gene.id).name 
        else:
            draft_GENRE.add_reactions([rxn])
            rxns_added += 1
            current_gpr = draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule
            draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule = current_gpr
            
print('Reactions added: ' + str(rxns_added))

Reactions added: 4


In [11]:
# Identify genes that must be added from the draft reconstruction
draft_genes = set([gene.id for gene in draft_GENRE.genes])
add_manually = set(add_manually).difference(draft_genes)
current_rxn_ids = set([x.id for x in draft_GENRE.reactions])

for gene in add_manually:
    for rxn in patric_draft.genes.get_by_id(gene).reactions:
        if rxn.id in current_rxn_ids:
            # update existing gene_reaction_rule
            current_gpr = draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule
            if current_gpr != '':
                current_gpr += ' or ' + gene
            else:
                current_gpr = gene
            draft_GENRE.reactions.get_by_id(rxn.id).gene_reaction_rule = current_gpr
            # update gene name
            draft_GENRE.genes.get_by_id(gene).name = patric_draft.genes.get_by_id(gene).name 
        else:
            draft_GENRE.add_reactions([rxn])
            draft_GENRE.genes.get_by_id(gene).name = patric_draft.genes.get_by_id(gene).name 
            rxns_added += 1

print('Reactions added: ' + str(rxns_added))

Reactions added: 8


In [12]:
# Fix a few remaining incorrect GPRs 
draft_GENRE.reactions.rxn02003_c.gene_reaction_rule = '645463.3.peg.237'
draft_GENRE.reactions.rxn14293_c.gene_reaction_rule += ' or 645463.3.peg.260'
draft_GENRE.reactions.rxn06517_c.gene_reaction_rule += ' or 645463.3.peg.1978'
draft_GENRE.reactions.rxn01404_c.gene_reaction_rule += ' or 645463.3.peg.658'
draft_GENRE.reactions.rxn01368_c.gene_reaction_rule += ' or 645463.3.peg.769'
draft_GENRE.reactions.rxn00119_c.gene_reaction_rule += ' or 645463.3.peg.2191'

In [13]:
# Replace removed Biomass component
cpd02967_c = draft_GENRE.metabolites.get_by_id('cpd02967_c') # N-Acetyl-beta-D-mannosaminyl-1,4-N-acetyl-D-glucosaminyldiphosphoundecaprenol
cpd00402_c = draft_GENRE.metabolites.get_by_id('cpd00402_c') # CDPglycerol
cpd00046_c = draft_GENRE.metabolites.get_by_id('cpd00046_c') # CMP

cpd12894_c = cobra.Metabolite(
    'cpd12894_c',
    formula='',
    name='Teichoic acid',
    compartment='cytosol')

# R20291 homolog unknown - Stabler et al. (2009). Genome Biol.
#gene = cobra.Gene('272563.8.peg.262')
#gene.name = 'Putative CDP-glycerol:Poly(Glycerophosphate) glycerophosphotransferase'

teichoicacid_rxn = cobra.Reaction('teichoicacid_rxn')
#teichoicacid_rxn.gene_reaction_rule = '272563.8.peg.262'
teichoicacid_rxn.name = 'Teichoic acid biosynthesis'
teichoicacid_rxn.lower_bound = 0.
teichoicacid_rxn.upper_bound = 1000.
teichoicacid_rxn.add_metabolites({
    cpd02967_c: -1.0,
    cpd00402_c: -1.0,
    cpd00046_c: 1.0,
    cpd12894_c: 1.0
})
draft_GENRE.add_reactions([teichoicacid_rxn])

In [14]:
# Replace Stickland pathway reaction
cpd00005_c = draft_GENRE.metabolites.get_by_id('cpd00005_c') # NADPH
cpd00067_c = draft_GENRE.metabolites.get_by_id('cpd00067_c') # H+
cpd00498_c = draft_GENRE.metabolites.get_by_id('cpd00498_c') # 2-Aceto-2-hydroxybutanoate
cpd00006_c = draft_GENRE.metabolites.get_by_id('cpd00006_c') # NADP
cpd02535_c = draft_GENRE.metabolites.get_by_id('cpd02535_c') # 2,3-Dihydroxy-3-methylvalerate

gene = cobra.Gene('645463.3.peg.1493')
gene.name = 'Ketol-acid reductoisomerase (NADP(+)) (EC 1.1.1.86)'

rxn08764_c = cobra.Reaction('rxn08764_c')
rxn08764_c.gene_reaction_rule = '645463.3.peg.1493'
rxn08764_c.name = 'Teichoic acid biosynthesis'
rxn08764_c.lower_bound = -1000.
rxn08764_c.upper_bound = 1000.
rxn08764_c.add_metabolites({
    cpd00005_c: -1.0,
    cpd00067_c: -1.0,
    cpd00498_c: -1.0,
    cpd00006_c: 1.0,
    cpd02535_c: 1.0
})
draft_GENRE.add_reactions([rxn08764_c])

In [15]:
# Remove DNA demand
try:
    draft_GENRE.reactions.get_by_id('rxn13783_c').remove_from_model(remove_orphans=True)
except:
    pass

In [16]:
# Prune orphaned nodes
cpd_total = 0
rxn_total = 0
removed = 1
while removed == 1:
    removed = 0

    # Metabolites
    for cpd in draft_GENRE.metabolites:
        if len(cpd.reactions) == 0:
            cpd.remove_from_model()
            removed = 1
            cpd_total += 1

    # Reactions
    for rxn in draft_GENRE.reactions:
        if len(rxn.metabolites) == 0: 
            rxn.remove_from_model()
            removed = 1
            rxn_total += 1
    
print('Metabolites pruned: ' + str(cpd_total))
print('Reactions pruned: ' + str(rxn_total))

Metabolites pruned: 2
Reactions pruned: 0


In [17]:
# Fix compartments
for cpd in draft_GENRE.metabolites:
    if cpd.compartment == 'c':
        cpd.compartment = 'cytosol'
    elif cpd.compartment == 'e':
        cpd.compartment = 'extracellular'

In [18]:
# Open exchange bounds
for rxn in draft_GENRE.reactions:
    if 'EX_' in rxn.id:
        rxn.bounds = (-1000.0,1000.0)

In [19]:
# Test that new model grows
draft_GENRE.slim_optimize()

89.94470384367948

In [40]:
# Test growth in minimal media conditions

# Karlsson et al. (1999). Microbiology.
mdm = ['cpd00001_e', # water
       'cpd00065_e', # L-Tryptophan
       'cpd00060_e', # L-Methionine
       'cpd00322_e', # L-Isoleucine
       'cpd00129_e', # L-Proline
       'cpd00156_e', # L-Valine
       'cpd00107_e', # L-Leucine
       'cpd00084_e', # L-Cysteine 
       'cpd00149_e', # Cobalt
       'cpd00099_e', # Chloride
       'cpd10515_e', # Iron
       'cpd00030_e', # Manganese
       'cpd00254_e', # Magnesium
       'cpd00063_e', # Calcium
       'cpd00205_e', # Potassium
       'cpd00009_e', # Phosphate
       'cpd00971_e', # Sodium
       'cpd00242_e', # Carbonate
       'cpd00104_e', # Biotin
       'cpd00644_e', # Pantothenate
       'cpd00263_e', # Pyridoxine
       'cpd00027_e'] # D-Glucose (Carbohydrate C-source)

# Theriot et al. (2013). Nature Communications.
ncmm = ['cpd00001_e', # water
        'cpd00104_e', # Biotin
        'cpd00644_e', # Pantothenate
        'cpd00263_e', # Pyridoxine
        'cpd00149_e', # Cobalt
        'cpd00099_e', # Chloride
        'cpd10515_e', # Iron
        'cpd00030_e', # Manganese
        'cpd00254_e', # Magnesium
        'cpd00063_e', # Calcium
        'cpd00205_e', # Potassium
        'cpd00009_e', # Phosphate
        'cpd00971_e', # Sodium
        'cpd00242_e', # Carbonate
        'cpd00322_e', # L-Isoleucine
        'cpd00129_e', # L-Proline
        'cpd00156_e', # L-Valine
        'cpd00107_e', # L-Leucine
        'cpd00084_e', # L-Cysteine 
        'cpd00065_e', # L-Tryptophan
        'cpd00060_e', # L-Methionine
        'cpd00119_e', # L-Histidine
        'cpd00033_e', # Glycine
        'cpd00051_e', # L-Arginine
        'cpd00161_e'] # L-Threonine

ncmm_exchanges = ['EX_' + x for x in ncmm]
for rxn in draft_GENRE.reactions:
    if len(rxn.products) == 0 and 'EX_' in rxn.id:
        if not rxn.id in ncmm_exchanges:
            rxn.bounds = (0.0,1000.0)
        else:
            rxn.bounds = (-1000.0,1000.0)
print('NCMM: ' + str(draft_GENRE.slim_optimize()))

mdm_exchanges = ['EX_' + x for x in mdm]
for rxn in draft_GENRE.reactions:
    if len(rxn.products) == 0 and 'EX_' in rxn.id:
        if not rxn.id in mdm_exchanges:
            rxn.bounds = (0.0,1000.0)
        else:
            rxn.bounds = (-1000.0,1000.0)
print('MDM: ' + str(draft_GENRE.slim_optimize()))

# Leave in MDM

NCMM: 11.149854054237371
MDM: 24.508304332569587


In [21]:
# Name new model
draft_GENRE.name = 'Clostridium difficile R20291'
draft_GENRE.id = 'iCdR' + str(len(draft_GENRE.genes))

In [22]:
draft_GENRE

0,1
Name,iCdR758
Memory address,0x07fd4a1347d50
Number of metabolites,1140
Number of reactions,1135
Number of groups,0
Objective expression,0.0 + 1.0*biomass - 1.0*biomass_reverse_01e59
Compartments,"cytosol, extracellular"


In [41]:
# Add annotation information

# SBO designations
# Metabolites
for cpd in draft_GENRE.metabolites:
    cpd.annotation['sbo'] = 'SBO:0000247'
# Reactions
for rxn in draft_GENRE.reactions:
    substrates = list(rxn.metabolites)
    compartments = set([x.compartment for x in substrates])
    if 'EX_' in rxn.id:
        rxn.annotation['sbo'] = 'SBO:0000627' # exchange
    elif len(compartments) > 1:
        rxn.annotation['sbo'] = 'SBO:0000185' # transport
    else:
        rxn.annotation['sbo'] = 'SBO:0000176' # metabolic
# Biomass
draft_GENRE.reactions.dna_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.rna_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.protein_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.teichoicacid_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.peptidoglycan_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.cellwall_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.lipid_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.cofactor_rxn.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.biomass.annotation['sbo'] = 'SBO:0000629'  
draft_GENRE.reactions.SK_cpd11416_c.annotation['sbo'] = 'SBO:0000632'  
# Genes
for gene in draft_GENRE.genes:
    gene.annotation['sbo'] = 'SBO:0000243'

# RefSeq annotations
patric_refseq = {}
with open('/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/r20291_refseq_genes.tsv') as ref:
    for line in ref:
        patric_refseq[line.split()[0]] = line.split()[1]
for gene in draft_GENRE.genes:
    if '.peg.' in gene.id:
        gene.annotation['refseq'] = 'NC_013316.1'
        try:
            gene.annotation['refseq.locustag'] = patric_refseq[gene.id]
        except:
            continue

# Metabolites
for cpd in draft_GENRE.metabolites:
    if 'cpd' in cpd.id:
        new_id = str(cpd.id).split('_')
        if len(new_id) > 1:
            del new_id[-1]
        new_id = ''.join(new_id)
        cpd.annotation['seed.compound'] = new_id
draft_GENRE.metabolites.C21400_c.annotation['kegg.compound'] = 'C21400'        
draft_GENRE.metabolites.C21399_c.annotation['kegg.compound'] = 'C21399'        
draft_GENRE.metabolites.C21090_c.annotation['kegg.compound'] = 'C21090'        
draft_GENRE.metabolites.C21399_e.annotation['kegg.compound'] = 'C21399'        

# Reactions
for rxn in draft_GENRE.reactions:
    if 'rxn' in rxn.id or 'EX_' in rxn.id:
        new_id = str(rxn.id).split('_')
        if len(new_id) > 1:
            del new_id[-1]
        new_id = ''.join(new_id)
        rxn.annotation['seed.reaction'] = new_id
    elif 'R' in rxn.id:
        new_id = str(rxn.id).split('_')
        if len(new_id) > 1:
            del new_id[-1]
        new_id = ''.join(new_id)
        rxn.annotation['kegg.reaction'] = new_id
draft_GENRE.reactions.SK_cpd11416_c.annotation['seed.reaction'] = 'SK_cpd11416'
draft_GENRE.reactions.ENOG4108HXH_c.annotation['eggnog.reaction'] = 'ENOG4108HXH'
draft_GENRE.reactions.COG3601_c.annotation['cog.reaction'] = 'COG3601'
draft_GENRE.reactions.K20025_c.annotation['kegg.reaction'] = 'K20025'

# Genes
for gene in draft_GENRE.genes:
    if '.peg.' in gene.id:
        gene.annotation['patric'] = 'fig|' + gene.id

In [None]:
# Correct residual gene names
iCdR758.genes.get_by_id('645463.3.peg.1176').name = 'Na(+)-dependent branched-chain amino acid transporter'
iCdR758.genes.get_by_id('645463.3.peg.735').name = 'ABC transporter, substrate-binding protein'
iCdR758.genes.get_by_id('645463.3.peg.2463').name = 'PTS system, glucitol/sorbitol-specific IIB component'
iCdR758.genes.get_by_id('645463.3.peg.929').name = 'N-acetylgalactosamine-6-phosphate deacetylase'
iCdR758.genes.get_by_id('645463.3.peg.2324').name = 'ribulose-5-phosphate 4-epimerase'
iCdR758.genes.get_by_id('645463.3.peg.46').name = 'Ribulose-5-phosphate 4-epimerase'
iCdR758.genes.get_by_id('645463.3.peg.3015').name = 'PTS system, IIB component'
iCdR758.genes.get_by_id('645463.3.peg.3014').name = 'PTS system, IIC component'
iCdR758.genes.get_by_id('645463.3.peg.3016').name = 'PTS system, IIA component'
iCdR758.genes.get_by_id('645463.3.peg.865').name = 'ABC transporter, substrate-binding protein'
iCdR758.genes.get_by_id('645463.3.peg.862').name = 'ABC transporter, substrate-binding protein'
iCdR758.genes.get_by_id('645463.3.peg.664').name = 'sodium-solute symporter, putative'
iCdR758.genes.get_by_id('645463.3.peg.1783').name = 'ABC transporter, ATP-binding protein'
iCdR758.genes.get_by_id('645463.3.peg.1421').name = 'DNA-3-methyladenine glycosylase II'
iCdR758.genes.get_by_id('645463.3.peg.2254').name = '2-oxoglutarate/2-oxoacid ferredoxin oxidoreductase, delta subunit'
iCdR758.genes.get_by_id('645463.3.peg.2253').name = '2-oxoglutarate/2-oxoacid ferredoxin oxidoreductase, alpha subunit'
iCdR758.genes.get_by_id('645463.3.peg.168').name = '4-hydroxyphenylacetate decarboxylase, small subunit'
iCdR758.genes.get_by_id('645463.3.peg.167').name = '4-hydroxyphenylacetate decarboxylase, large subunit'
iCdR758.genes.get_by_id('645463.3.peg.2417').name = 'Indolepyruvate oxidoreductase subunit'
iCdR758.genes.get_by_id('645463.3.peg.2389').name = 'Glycine reductase component B beta subunit'
iCdR758.genes.get_by_id('645463.3.peg.392').name = '2-hydroxyglutaryl-CoA dehydratase, A-component'
iCdR758.genes.get_by_id('645463.3.peg.393').name = '2-hydroxyglutaryl-CoA dehydratase, B-component'
iCdR758.genes.get_by_id('645463.3.peg.390').name = 'Hydroxyproline dehydratase putative'
iCdR758.genes.get_by_id('645463.3.peg.391').name = 'CoA-substrate-specific enzyme activase; 2-hydroxyglutaryl-CoA dehydratase activator, A-component'
iCdR758.genes.get_by_id('645463.3.peg.1466').name = 'NADH-dependent reduced ferredoxin:NADP+ oxidoreductase subunit A'
iCdR758.genes.get_by_id('645463.3.peg.169').name = '4-hydroxyphenylacetate decarboxylase activating enzyme'
iCdR758.genes.get_by_id('645463.3.peg.2250').name = 'Acetyl-CoA synthetase (ADP-forming) alpha and beta chains, putative'
iCdR758.genes.get_by_id('645463.3.peg.2379').name = 'Succinate-semialdehyde dehydrogenase, CoA-dependent'
iCdR758.genes.get_by_id('645463.3.peg.976').name = 'Electron bifurcating butyryl-CoA dehydrogenase (NAD+, ferredoxin)'
iCdR758.genes.get_by_id('645463.3.peg.2703').name = 'UDP-N-acetylmuramoyl-tripeptide--D-alanyl-D-alanine ligase'

In [42]:
# Save to files
cobra.io.write_sbml_model(draft_GENRE, '/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/reconstructions/iCdR758.sbml')
cobra.io.save_json_model(draft_GENRE, '/home/mjenior/Desktop/repos/Jenior_Cdifficile_2019/data/reconstructions/iCdR758.json')