In [5]:
from riptide import *

In [6]:
iCdJ794 = cobra.io.read_sbml_model('/home/mjenior/Desktop/repos/Jenior_iCdJ794_2019/data/iCdJ794.sbml')

### Base Model Statistics

In [7]:
iCdJ794

0,1
Name,iCdJ794
Memory address,0x07f199805b1d0
Number of metabolites,1134
Number of reactions,1129
Objective expression,0.0 + 1.0*biomass - 1.0*biomass_reverse_01e59
Compartments,"cytosol, extracellular"


In [4]:
# Report some additional stats

print('Genes: ' + str(len(list(iCdJ794.genes))))

exch = 0
for rxn in iCdJ794.reactions:
    if len(list(rxn.products)) == 0:
        exch += 1
print('Exchanges: ' + str(exch))

trans = 0
for rxn in iCdJ794.reactions:
    comps = set([x.compartment for x in list(rxn.reactants)] + [x.compartment for x in list(rxn.products)])
    if len(comps) > 1:
        trans += 1
print('Transporters: ' + str(trans))

metab = len(list(iCdJ794.reactions)) - exch - trans
print('Metabolic reactions: ' + str(metab))

Genes: 794
Total Reactions: 1129

Exchanges: 99
Transporters: 104
Metabolic reactions: 926


In [19]:
import copy
from cobra.flux_analysis.variability import find_essential_genes

def essential_genes(genre, media=None, optimum_fraction=None):
    model = copy.deepcopy(genre)
    
    # Change media conditions if necessary
    if media is not None:
        exchanges = ['EX_' + x for x in media]
        for rxn in model.reactions:
            if len(list(rxn.products)) == 0:
                if rxn.id in exchanges:
                    model.reactions.get_by_id(rxn.id).lower_bound = -1000.0
                else:
                    model.reactions.get_by_id(rxn.id).lower_bound = 0.0
    
    # Find essential genes
    essential_genes = find_essential_genes(model, threshold=optimum_fraction)
    
    print('Essential genes: ' + str(len(essential_genes)))
    
    return essential_genes
                

In [20]:
# Define media conditions
mdm = ['cpd00001_e', # water
       'cpd00065_e', # L-Tryptophan
       'cpd00060_e', # L-Methionine
       'cpd00322_e', # L-Isoleucine
       'cpd00129_e', # L-Proline
       'cpd00156_e', # L-Valine
       'cpd00107_e', # L-Leucine
       'cpd00084_e', # L-Cysteine 
       'cpd00149_e', # Cobalt
       'cpd00099_e', # Chloride
       'cpd10515_e', # Iron
       'cpd00030_e', # Manganese
       'cpd00254_e', # Magnesium
       'cpd00063_e', # Calcium
       'cpd00205_e', # Potassium
       'cpd00009_e', # Phosphate
       'cpd00971_e', # Sodium
       'cpd00242_e', # Carbonate
       'cpd00104_e', # Biotin
       'cpd00305_e', # Thiamine
       'cpd00263_e', # Pyridoxine
       'cpd00027_e'] # D-Glucose (Carbohydrate C-source)

# Theriot et al. (2013). Nature Communications.
ncmm = ['cpd00001_e', # water
        'cpd00104_e', # Biotin
        'cpd00644_e', # Pantothenate
        'cpd00263_e', # Pyridoxine
        'cpd00149_e', # Cobalt
        'cpd00099_e', # Chloride
        'cpd10515_e', # Iron
        'cpd00030_e', # Manganese
        'cpd00254_e', # Magnesium
        'cpd00063_e', # Calcium
        'cpd00205_e', # Potassium
        'cpd00009_e', # Phosphate
        'cpd00971_e', # Sodium
        'cpd00242_e', # Carbonate
        'cpd00322_e', # L-Isoleucine
        'cpd00129_e', # L-Proline
        'cpd00156_e', # L-Valine
        'cpd00107_e', # L-Leucine
        'cpd00084_e', # L-Cysteine 
        'cpd00065_e', # L-Tryptophan
        'cpd00060_e', # L-Methionine
        'cpd00119_e', # L-Histidine
        'cpd00033_e', # Glycine
        'cpd00051_e', # L-Arginine
        'cpd00161_e'] # L-Threonine

In [21]:
# Essentiality in minimal medias
mdm_essential = essential_genes(iCdJ794, media=mdm)
ncmm_essential = essential_genes(iCdJ794,  media=ncmm)

Essential genes: 92
Essential genes: 80


### RIPTiDe *in vivo* Contextualization

In [5]:
# Read in formatted transcription files
def read_transcription(infile):
    
    abund_dict = {}
    with open(infile, 'r') as abunds:
        header = abunds.readline()
        for line in abunds:
            line = line.split()
            gene = line[0].split('|')[0]
            abund = float(line[2])
            
            abund_dict[gene] = abund
            
    return abund_dict


In [6]:
# Read in in vivo C. difficile transcription
cefoperazone = read_transcription('/home/mjenior/Desktop/repos/Jenior_iCdJ794_2019/data/transcript/cefoperazone_630.mapped.norm.tsv')
clindamycin = read_transcription('/home/mjenior/Desktop/repos/Jenior_iCdJ794_2019/data/transcript/clindamycin_630.mapped.norm.tsv')
streptomycin = read_transcription('/home/mjenior/Desktop/repos/Jenior_iCdJ794_2019/data/transcript/streptomycin_630.mapped.norm.tsv')
gnotobiotic = read_transcription('/home/mjenior/Desktop/repos/Jenior_iCdJ794_2019/data/transcript/gnotobiotic_630.mapped.norm.tsv')

In [7]:
cef_riptide = riptide(iCdJ794, cefoperazone)


Initializing model and parsing transcriptome...
Pruning zero flux subnetworks...
Sampling context-specific flux distributions (longest step)...

Reactions pruned to 291 from 1129 (74.22% change)
Metabolites pruned to 289 from 1134 (74.51% change)
Flux through the objective DECREASED to ~76.48 from ~89.77 (14.8% change)
Solution space volume DECREASED to ~1785.89 from ~8460.51 (78.89% change)

RIPTiDe completed in 3 minutes and 16 seconds



In [8]:
clinda_riptide = riptide(iCdJ794, clindamycin)


Initializing model and parsing transcriptome...
Pruning zero flux subnetworks...
Sampling context-specific flux distributions (longest step)...

Reactions pruned to 283 from 1129 (74.93% change)
Metabolites pruned to 280 from 1134 (75.31% change)
Flux through the objective DECREASED to ~75.09 from ~89.77 (16.35% change)
Solution space volume DECREASED to ~1147.78 from ~8460.51 (86.43% change)

RIPTiDe completed in 3 minutes and 0 seconds



In [9]:
strep_riptide = riptide(iCdJ794, streptomycin)


Initializing model and parsing transcriptome...
Pruning zero flux subnetworks...
Sampling context-specific flux distributions (longest step)...

Reactions pruned to 288 from 1129 (74.49% change)
Metabolites pruned to 285 from 1134 (74.87% change)
Flux through the objective DECREASED to ~80.25 from ~89.77 (10.6% change)
Solution space volume DECREASED to ~1334.62 from ~8460.51 (84.23% change)

RIPTiDe completed in 3 minutes and 8 seconds



In [10]:
gnoto_riptide = riptide(iCdJ794, gnotobiotic)


Initializing model and parsing transcriptome...
Pruning zero flux subnetworks...
Sampling context-specific flux distributions (longest step)...

Reactions pruned to 281 from 1129 (75.11% change)
Metabolites pruned to 281 from 1134 (75.22% change)
Flux through the objective DECREASED to ~73.13 from ~89.77 (18.54% change)
Solution space volume DECREASED to ~890.11 from ~8460.51 (89.48% change)

RIPTiDe completed in 2 minutes and 57 seconds



In [26]:
# Save contextualized models
cobra.io.write_sbml_model(cef_riptide.model, '/home/mjenior/Desktop/reconstructions/cef_riptide.sbml')
cobra.io.write_sbml_model(clinda_riptide.model, '/home/mjenior/Desktop/reconstructions/clinda_riptide.sbml')
cobra.io.write_sbml_model(strep_riptide.model, '/home/mjenior/Desktop/reconstructions/strep_riptide.sbml')
cobra.io.write_sbml_model(gnoto_riptide.model, '/home/mjenior/Desktop/reconstructions/gnoto_riptide.sbml')

### Context-specific Gene Essentiality

In [27]:
# Read in models if necessary
iCdJ794 = cobra.io.read_sbml_model('/home/mjenior/Desktop/repos/Jenior_iCdJ794_2019/data/iCdJ794.sbml')
cef_iCdJ794 = cobra.io.read_sbml_model('/home/mjenior/Desktop/reconstructions/cef_riptide.sbml')
clinda_iCdJ794 = cobra.io.read_sbml_model('/home/mjenior/Desktop/reconstructions/clinda_riptide.sbml')
strep_iCdJ794 = cobra.io.read_sbml_model('/home/mjenior/Desktop/reconstructions/strep_riptide.sbml')
gnoto_iCdJ794 = cobra.io.read_sbml_model('/home/mjenior/Desktop/reconstructions/gnoto_riptide.sbml')

In [35]:
# Unconstrained base model
base_essential = essential_genes(iCdJ794)
base_essential_ids = [x.id for x in base_essential]

Essential genes: 43


In [36]:
cef_essential = essential_genes(cef_iCdJ794)
cef_essential_ids = [x.id for x in cef_essential]

Essential genes: 67


In [37]:
clinda_essential = essential_genes(clinda_iCdJ794)
clinda_essential_ids = [x.id for x in clinda_essential]

Essential genes: 64


In [38]:
strep_essential = essential_genes(strep_iCdJ794)
strep_essential_ids = [x.id for x in strep_essential]

Essential genes: 64


In [39]:
gnoto_essential = essential_genes(gnoto_iCdJ794)
gnoto_essential_ids = [x.id for x in gnoto_essential]

Essential genes: 67


In [40]:
# Compares lists to create diagrams for 4 groups
def venn_comparison(list1, list2, list3, list4):
        
    # Confirm correct data types
    list1 = set(list1); list2 = set(list2); list3 = set(list3); list4 = set(list4)
    
    # Identify exclusive elements
    list1_only = list1.difference(list2).difference(list3).difference(list4)
    list2_only = list2.difference(list1).difference(list3).difference(list4)
    list3_only = list3.difference(list1).difference(list2).difference(list4)
    list4_only = list4.difference(list1).difference(list2).difference(list3)

    # Find overlap between just 2 groups
    list1_list2_overlap = list1.intersection(list2).difference(list3).difference(list4)
    list1_list3_overlap = list1.intersection(list3).difference(list2).difference(list4)
    list1_list4_overlap = list1.intersection(list4).difference(list2).difference(list3)
    list2_list3_overlap = list2.intersection(list3).difference(list1).difference(list4)
    list2_list4_overlap = list2.intersection(list4).difference(list1).difference(list3)
    list3_list4_overlap = list3.intersection(list4).difference(list1).difference(list2)

    # Find overlap in 3 groups
    list1_list2_list3_overlap = list1.intersection(list2).intersection(list3).difference(list4)
    list1_list2_list4_overlap = list1.intersection(list2).intersection(list4).difference(list3)
    list1_list3_list4_overlap = list1.intersection(list3).intersection(list4).difference(list2)
    list2_list3_list4_overlap = list2.intersection(list3).intersection(list4).difference(list1)
    
    # Find overlap between all groups
    all_list_overlap = list1.intersection(list2).intersection(list3).intersection(list4)
    
    # Calculate totals in each group
    list1_total = float(len(list1))
    list2_total = float(len(list2))
    list3_total = float(len(list3))
    list4_total = float(len(list4))
    list1_only_total = float(len(list1_only))
    list2_only_total = float(len(list2_only))
    list3_only_total = float(len(list3_only))
    list4_only_total = float(len(list4_only))
    list1_list2_overlap_total = float(len(list1_list2_overlap))
    list1_list3_overlap_total = float(len(list1_list3_overlap))
    list1_list4_overlap_total = float(len(list1_list4_overlap))
    list2_list3_overlap_total = float(len(list2_list3_overlap))
    list2_list4_overlap_total = float(len(list2_list4_overlap))
    list3_list4_overlap_total = float(len(list3_list4_overlap))
    list1_list2_list3_overlap_total = float(len(list1_list2_list3_overlap))
    list1_list2_list4_overlap_total = float(len(list1_list2_list4_overlap))
    list1_list3_list4_overlap_total = float(len(list1_list3_list4_overlap))
    list2_list3_list4_overlap_total = float(len(list2_list3_list4_overlap))
    all_list_overlap_total = float(len(all_list_overlap))
    
    # Calculate percent overlaps
    list1_only_percent = round(((list1_only_total / list1_total) * 100.0), 1)
    list2_only_percent = round(((list2_only_total / list2_total) * 100.0), 1)
    list3_only_percent = round(((list3_only_total / list3_total) * 100.0), 1)
    list4_only_percent = round(((list4_only_total / list4_total) * 100.0), 1)
    temp1 = (list1_list2_overlap_total / list1_total) * 100.0
    temp2 = (list1_list2_overlap_total / list2_total) * 100.0
    list1_list2_overlap_percent = round(numpy.mean([temp1, temp2]), 1)
    temp1 = (list1_list3_overlap_total / list1_total) * 100.0
    temp2 = (list1_list3_overlap_total / list3_total) * 100.0
    list1_list3_overlap_percent = round(numpy.mean([temp1, temp2]), 1)
    temp1 = (list1_list4_overlap_total / list1_total) * 100.0
    temp2 = (list1_list4_overlap_total / list4_total) * 100.0
    list1_list4_overlap_percent = round(numpy.mean([temp1, temp2]), 1)
    temp1 = (list2_list3_overlap_total / list2_total) * 100.0
    temp2 = (list2_list3_overlap_total / list3_total) * 100.0
    list2_list3_overlap_percent = round(numpy.mean([temp1, temp2]), 1)
    temp1 = (list2_list4_overlap_total / list2_total) * 100.0
    temp2 = (list2_list4_overlap_total / list4_total) * 100.0
    list2_list4_overlap_percent = round(numpy.mean([temp1, temp2]), 1)
    temp1 = (list3_list4_overlap_total / list3_total) * 100.0
    temp2 = (list3_list4_overlap_total / list4_total) * 100.0
    list3_list4_overlap_percent = round(numpy.mean([temp1, temp2]), 1)
    temp1 = (list1_list2_list3_overlap_total / list1_total) * 100.0
    temp2 = (list1_list2_list3_overlap_total / list2_total) * 100.0
    temp3 = (list1_list2_list3_overlap_total / list3_total) * 100.0
    list1_list2_list3_overlap_percent = round(numpy.mean([temp1, temp2, temp3]), 1)
    temp1 = (list1_list2_list4_overlap_total / list1_total) * 100.0
    temp2 = (list1_list2_list4_overlap_total / list2_total) * 100.0
    temp3 = (list1_list2_list4_overlap_total / list4_total) * 100.0
    list1_list2_list4_overlap_percent = round(numpy.mean([temp1, temp2, temp3]), 1)
    temp1 = (list1_list3_list4_overlap_total / list1_total) * 100.0
    temp2 = (list1_list3_list4_overlap_total / list3_total) * 100.0
    temp3 = (list1_list3_list4_overlap_total / list4_total) * 100.0
    list1_list3_list4_overlap_percent = round(numpy.mean([temp1, temp2, temp3]), 1)
    temp1 = (list2_list3_list4_overlap_total / list2_total) * 100.0
    temp2 = (list2_list3_list4_overlap_total / list3_total) * 100.0
    temp3 = (list2_list3_list4_overlap_total / list4_total) * 100.0
    list2_list3_list4_overlap_percent = round(numpy.mean([temp1, temp2, temp3]), 1)
    temp1 = (all_list_overlap_total / list1_total) * 100.0
    temp2 = (all_list_overlap_total / list2_total) * 100.0
    temp3 = (all_list_overlap_total / list3_total) * 100.0
    temp4 = (all_list_overlap_total / list4_total) * 100.0
    all_list_overlap_percent = round(numpy.mean([temp1, temp2, temp3, temp4]), 1)
    
    # Print report to the screen
    print('List 1 only: ' + str(list1_only_percent) + '% (' + str(int(list1_only_total)) + ')')
    print('List 2 only: ' + str(list2_only_percent) + '% (' + str(int(list2_only_total)) + ')')
    print('List 3 only: ' + str(list3_only_percent) + '% (' + str(int(list3_only_total)) + ')')
    print('List 4 only: ' + str(list4_only_percent) + '% (' + str(int(list4_only_total)) + ')\n')
    print('List 1 + List 2: ' + str(list1_list2_overlap_percent) + '% (' + str(int(list1_list2_overlap_total)) + ')')
    print('List 1 + List 3: ' + str(list1_list3_overlap_percent) + '% (' + str(int(list1_list3_overlap_total)) + ')')
    print('List 1 + List 4: ' + str(list1_list4_overlap_percent) + '% (' + str(int(list1_list4_overlap_total)) + ')')
    print('List 2 + List 3: ' + str(list2_list3_overlap_percent) + '% (' + str(int(list2_list3_overlap_total)) + ')')
    print('List 2 + List 4: ' + str(list2_list4_overlap_percent) + '% (' + str(int(list2_list4_overlap_total)) + ')')
    print('List 3 + List 4: ' + str(list3_list4_overlap_percent) + '% (' + str(int(list3_list4_overlap_total)) + ')\n')
    print('List 1 + List 2 + List 3: ' + str(list1_list2_list3_overlap_percent) + '% (' + str(int(list1_list2_list3_overlap_total)) + ')')
    print('List 1 + List 2 + List 4: ' + str(list1_list2_list4_overlap_percent) + '% (' + str(int(list1_list2_list4_overlap_total)) + ')')
    print('List 1 + List 3 + List 4: ' + str(list1_list3_list4_overlap_percent) + '% (' + str(int(list1_list3_list4_overlap_total)) + ')')
    print('List 2 + List 3 + List 4: ' + str(list2_list3_list4_overlap_percent) + '% (' + str(int(list2_list3_list4_overlap_total)) + ')\n')
    print('Shared: ' + str(all_list_overlap_percent) + '% (' + str(int(all_list_overlap_total)) + ')')

    # Return new lists
    return [list1_only,list2_only,list3_only,list4_only,list1_list2_overlap, list1_list3_overlap, list1_list4_overlap, list2_list3_overlap, list2_list4_overlap, list3_list4_overlap, list1_list2_list3_overlap, list1_list2_list4_overlap, list1_list3_list4_overlap, list2_list3_list4_overlap, all_list_overlap]


In [41]:
# Context specific gene essentiality
# 1. cefoperazone
# 2. clindamycin
# 3. streptomycin
# 4. gnotobiotic
essentiality = venn_comparison(cef_essential_ids, clinda_essential_ids, strep_essential_ids, gnoto_essential_ids)

List 1 only: 4.5% (3)
List 2 only: 1.6% (1)
List 3 only: 1.6% (1)
List 4 only: 9.0% (6)

List 1 + List 2: 0.0% (0)
List 1 + List 3: 0.0% (0)
List 1 + List 4: 3.0% (2)
List 2 + List 3: 1.6% (1)
List 2 + List 4: 1.5% (1)
List 3 + List 4: 0.0% (0)

List 1 + List 2 + List 3: 6.2% (4)
List 1 + List 2 + List 4: 0.0% (0)
List 1 + List 3 + List 4: 1.5% (1)
List 2 + List 3 + List 4: 0.0% (0)

Shared: 87.1% (57)


In [42]:
# Gnotobiotic only
essentiality[3]

{'272563.8.peg.1421',
 '272563.8.peg.2321',
 '272563.8.peg.3321',
 '272563.8.peg.570',
 'CD630_24170',
 'CD630_24180'}

In [31]:
# Conventional only
essentiality[10]

{'272563.8.peg.1282', '272563.8.peg.3390', '272563.8.peg.3438', 'CD630_06820'}

In [32]:
# Cefoperazone only
essentiality[0]

{'272563.8.peg.2278', '272563.8.peg.871', '272563.8.peg.872'}

In [44]:
# Gnotobiotic only
essentiality[3]

{'272563.8.peg.1421',
 '272563.8.peg.2321',
 '272563.8.peg.3321',
 '272563.8.peg.570',
 'CD630_24170',
 'CD630_24180'}

### Context-specific substrate importance

In [50]:
def test_growth_substrates(genre, extracellular='extracellular', optimum_fraction=0.85):
    
    obj_val = genre.slim_optimize() * optimum_fraction
    
    for test in range(1,100):
        important = set()
        with genre as g:
            for rxn in g.reactions:
                if len(rxn.reactants) == 0 or len(rxn.products) == 0: 
                    rxn.upper_bound = 0.01
                    if g.slim_optimize() < obj_val: 
                        for cpd in rxn.metabolites:
                            if cpd.compartment == extracellular:
                                important |= set([cpd.id + ' --- ' + cpd.name])                    
                    rxn.upper_bound = 1000
    
    print('Important growth substrates: ' + str(len(important)))
    return important
    

In [51]:
cef_substrates = test_growth_substrates(cef_iCdJ794)

Important growth substrates: 6


In [56]:
cef_substrates

{'cpd00029_e --- Acetate',
 'cpd00041_e --- L-Aspartate',
 'cpd00122_e --- N-Acetyl-D-glucosamine',
 'cpd00307_e --- Cytosine',
 'cpd00492_e --- N-Acetyl-D-mannosamine',
 'cpd19585_e --- 2-Methylbutyrate'}

In [52]:
clinda_substrates = test_growth_substrates(clinda_iCdJ794)

Important growth substrates: 6


In [57]:
clinda_substrates

{'cpd00029_e --- Acetate',
 'cpd00041_e --- L-Aspartate',
 'cpd00122_e --- N-Acetyl-D-glucosamine',
 'cpd00307_e --- Cytosine',
 'cpd01711_e --- Isobutyrate',
 'cpd19585_e --- 2-Methylbutyrate'}

In [53]:
strep_substrates = test_growth_substrates(strep_iCdJ794)

Important growth substrates: 7


In [58]:
strep_substrates

{'cpd00029_e --- Acetate',
 'cpd00041_e --- L-Aspartate',
 'cpd00122_e --- N-Acetyl-D-glucosamine',
 'cpd00141_e --- Propionate',
 'cpd00307_e --- Cytosine',
 'cpd00492_e --- N-Acetyl-D-mannosamine',
 'cpd19585_e --- 2-Methylbutyrate'}

In [54]:
gnoto_substrates = test_growth_substrates(gnoto_iCdJ794)

Important growth substrates: 5


In [59]:
gnoto_substrates

{'cpd00013_e --- Ammonia',
 'cpd00029_e --- Acetate',
 'cpd00041_e --- L-Aspartate',
 'cpd00122_e --- N-Acetyl-D-glucosamine',
 'cpd19585_e --- 2-Methylbutyrate'}

In [55]:
# Context specific growth substrates
# 1. cefoperazone
# 2. clindamycin
# 3. streptomycin
# 4. gnotobiotic
substrates = venn_comparison(cef_substrates, clinda_substrates, strep_substrates, gnoto_substrates)

List 1 only: 0.0% (0)
List 2 only: 16.7% (1)
List 3 only: 14.3% (1)
List 4 only: 20.0% (1)

List 1 + List 2: 0.0% (0)
List 1 + List 3: 15.5% (1)
List 1 + List 4: 0.0% (0)
List 2 + List 3: 0.0% (0)
List 2 + List 4: 0.0% (0)
List 3 + List 4: 0.0% (0)

List 1 + List 2 + List 3: 15.9% (1)
List 1 + List 2 + List 4: 0.0% (0)
List 1 + List 3 + List 4: 0.0% (0)
List 2 + List 3 + List 4: 0.0% (0)

Shared: 67.6% (4)


In [53]:
# Cefoperazone only
substrates[0]

set()

In [52]:
# Gnotobiotic only
substrates[3]

{'cpd00122_e --- N-Acetyl-D-glucosamine'}

In [51]:
# Conventional only
substrates[10]

{'cpd00307_e --- Cytosine'}

In [50]:
# Core
substrates[-1]

{'cpd00041_e --- L-Aspartate'}