In [1]:
from riptide import *

In [25]:
iCdJ794 = cobra.io.read_sbml_model('/home/matthew/Desktop/repos/Jenior_iCdJ794_2019/data/iCdJ794.sbml')

In [28]:
# Read in formatted transcription files
def read_transcription(infile):
    abund_dict = {}
    with open(infile, 'r') as abunds:
        header = abunds.readline()
        for line in abunds:
            line = line.split()
            gene = line[0].split('|')[0]
            abund = float(line[2])
            abund_dict[gene] = abund
    
    return abund_dict

# Read in in vivo C. difficile transcription
cefoperazone = read_transcription('/home/matthew/Desktop/repos/Jenior_iCdJ794_2019/data/transcript/cefoperazone_630.mapped.norm.tsv')
clindamycin = read_transcription('/home/matthew/Desktop/repos/Jenior_iCdJ794_2019/data/transcript/clindamycin_630.mapped.norm.tsv')
streptomycin = read_transcription('/home/matthew/Desktop/repos/Jenior_iCdJ794_2019/data/transcript/streptomycin_630.mapped.norm.tsv')

In [29]:
# Run RIPTiDe contextualization
cef_iCdJ794_riptide = riptide.contextualize(model=iCdJ794, transcription=cefoperazone)
clinda_iCdJ794_riptide = riptide.contextualize(model=iCdJ794, transcription=clindamycin)
strep_iCdJ794_riptide = riptide.contextualize(model=iCdJ794, transcription=streptomycin)


Initializing model and integrating transcriptomic data...
Pruning zero flux subnetworks...
Exploring context-specific flux distributions...

Reactions pruned to 288 from 1129 (74.49% change)
Metabolites pruned to 284 from 1134 (74.96% change)
Flux through the objective DECREASED to ~69.94 from ~89.77 (22.09% change)

RIPTiDe completed in 14 seconds


Initializing model and integrating transcriptomic data...
Pruning zero flux subnetworks...
Exploring context-specific flux distributions...

Reactions pruned to 286 from 1129 (74.67% change)
Metabolites pruned to 284 from 1134 (74.96% change)
Flux through the objective DECREASED to ~73.18 from ~89.77 (18.48% change)

RIPTiDe completed in 15 seconds


Initializing model and integrating transcriptomic data...
Pruning zero flux subnetworks...
Exploring context-specific flux distributions...

Reactions pruned to 291 from 1129 (74.22% change)
Metabolites pruned to 288 from 1134 (74.6% change)
Flux through the objective DECREASED to ~74.9 from 

In [43]:
# Virulence algorithm rough draft

from cobra.flux_analysis.variability import *

def virulence_contribution(model, substrates, optimum_fraction=0.85, cytosol='cytosol'):
    # model = COBRA model (REQUIRED)
    # substrates = list or set of metabolite IDs (REQUIRED)
    # optimum_fraction = fraction of optimum to constrain previous objective
    # cytosol = label for intracellular compartment
    
    # Save exchange reactions IDs
    exchanges = set()
    for rxn in model.reactions:
        if len(list(rxn.reactants)) == 0 or len(list(rxn.products)) == 0:
            exchanges |= set([rxn.id])
        
    # Assess essentiality of base model (1% objective flux)
    base_genes = find_essential_genes(model)
    base_rxns = find_essential_reactions(model)
    
    # Handle both intra- and extracellular metabolites
    substrates = [x.split('_')[0] for x in substrates]
    intracellular_cpd = set([x + '_c' for x in substrates])
    extracellular_cpd = set([x + '_e' for x in substrates])
    
    # Create substrate coefficient dictionary
    substrate_coeff_dict = {}
    metabolites = set([x.id for x in model.metabolites])
    for cpd in intracellular_cpd: 
        # check if metabolite is not in the model
        if cpd not in metabolites:
            continue
        else:
            substrate_coeff_dict[model.metabolites.get_by_id(cpd)] = -1.0
    
    # Create virulence metabolite
    virulence_cpd_c = cobra.Metabolite(
    'virulence',
    name='Virulence factors',
    compartment=cytosol)
    substrate_coeff_dict[virulence_cpd_c] = 1.0
        
    with model as m:
        
        # Fix previous objective as a constraint
        prev_obj_val = m.slim_optimize()
        prev_obj_constraint = m.problem.Constraint(m.objective.expression, 
                                                   lb=prev_obj_val*optimum_fraction, ub=prev_obj_val)
        m.add_cons_vars([prev_obj_constraint])
    
        # Add virulence reaction and sink
        virulence_rxn_c = cobra.Reaction('virulence_rxn_c') 
        virulence_rxn_c.name = 'Virulence factor generation'
        virulence_rxn_c.lower_bound = 0.
        virulence_rxn_c.upper_bound = 1000.
        virulence_rxn_c.add_metabolites(substrate_coeff_dict)
        m.add_reactions([virulence_rxn_c])
        m.add_boundary(virulence_cpd_c, type='sink', reaction_id='virulence_DM', ub=1000.)
        
        # Set new objective
        m.objective = m.reactions.virulence_rxn_c

        # Check essentiality, screen against those essential to biomass
        virulence_genes = find_essential_genes(m)
        virulence_genes = virulence_genes.difference(base_genes)
        virulence_genes = set([x.id for x in virulence_genes])
        virulence_rxns = find_essential_reactions(m)
        virulence_rxns = virulence_rxns.difference(base_rxns)
        virulence_substrates = set()
        for rxn in virulence_rxns:
            for cpd in rxn.reactants:
                test = cpd.id.split('_')[0]
                if test not in substrates:
                    virulence_substrates |= set([cpd.id])
        virulence_substrates = virulence_substrates.difference(set(['virulence']))
        # Usually includes several co-factors that aren't super informative
        
    # Look an additonal degree away from the identified nodes, leverage to architecture of the network
    adj_genes, adj_cpds = track_adjacent(model, virulence_rxns, virulence_substrates, virulence_genes, intracellular_cpd)
    
    # Report some stats
    print('Genes directly associated with virulence: ' + str(len(virulence_genes)))
    print('Metabolites directly associated with virulence: ' + str(len(virulence_substrates)) + '\n')
    
    print('Genes indirectly associated with virulence: ' + str(len(adj_genes)))
    print('Metabolites indirectly associated with virulence: ' + str(len(adj_cpds)))
    
    return virulence_genes, virulence_substrates, adj_genes, adj_cpds


def track_adjacent(model, reactions, metabolites, genes, ignore):
    
    optimum_fraction=0.85
    
    # Essentiality check for near-optimal virulence production 
    with model as m:
        obj_threshold = m.slim_optimize()
        high_flux_genes = find_essential_genes(m, threshold=obj_threshold * optimum_fraction)
        high_flux_reactions = find_essential_reactions(m, threshold=obj_threshold * optimum_fraction)
        high_flux_substrates = set()
        for rxn in high_flux_reactions:
            for cpd in rxn.reactants:
                if cpd.id not in metabolites:
                    high_flux_substrates |= set([cpd.id])
    
    # Parse model topology for adjacency
    adjacent_substrates = set()
    adjacent_genes = set()
    for cpd1 in metabolites:
        for rxn in model.metabolites.get_by_id(cpd1).reactions:
            if rxn.id not in reactions:
                adjacent_genes |= rxn.genes
                for cpd2 in rxn.reactants:
                    if cpd2.id not in metabolites:
                        adjacent_substrates |= set([cpd2.id])
                
    # Compare to high flux essentiality
    adjacent_substrates = high_flux_substrates.intersection(adjacent_substrates)
    adjacent_substrates = adjacent_substrates.difference(metabolites).difference(ignore)
    adjacent_genes = high_flux_genes.intersection(adjacent_genes)
    adjacent_genes = adjacent_genes.difference(genes)
    
    return adjacent_genes, adjacent_substrates
    


In [31]:
# Karlsson et al. (1999). Microbiology
# Antunes et al. (2012). Nucleic Acids Res.
proline = ['cpd00129','cpd00567'] 
glucose = ['cpd00026','cpd00027','cpd00079','cpd00089','cpd00190','cpd00499']
fructose = ['cpd00072','cpd00082','cpd00802','cpd00503']
mannitol = ['cpd00314','cpd00491','cpd34018']
sucrose = ['cpd00076','cpd19187']

toxin_metabolites = proline + glucose + fructose + mannitol + sucrose

In [44]:
base_genes, base_substrates, adj_genes, adj_cpds = virulence_contribution(iCdJ794, toxin_metabolites)

Genes directly associated with virulence: 8
Metabolites directly associated with virulence: 12

Genes indirectly associated with virulence: 39
Metabolites indirectly associated with virulence: 107


In [45]:
cef_genes, cef_substrates, cef_adj_genes, cef_adj_cpds = virulence_contribution(cef_iCdJ794_riptide.model, toxin_metabolites)

Genes directly associated with virulence: 21
Metabolites directly associated with virulence: 50

Genes indirectly associated with virulence: 71
Metabolites indirectly associated with virulence: 138


In [46]:
clinda_genes, clinda_substrates, clinda_adj_genes, clinda_adj_cpds = virulence_contribution(clinda_iCdJ794_riptide.model, toxin_metabolites)

Genes directly associated with virulence: 18
Metabolites directly associated with virulence: 48

Genes indirectly associated with virulence: 67
Metabolites indirectly associated with virulence: 136


In [47]:
strep_genes, strep_substrates, strep_adj_genes, strep_adj_cpds = virulence_contribution(strep_iCdJ794_riptide.model, toxin_metabolites)

Genes directly associated with virulence: 22
Metabolites directly associated with virulence: 44

Genes indirectly associated with virulence: 69
Metabolites indirectly associated with virulence: 139


In [48]:
toxin_substrates = cef_substrates.intersection(clinda_substrates).intersection(strep_substrates)
for x in toxin_substrates: print(iCdJ794.metabolites.get_by_id(x).name)

H2O
2-Demethylmenaquinone 8
Malonyl-CoA
2-Methylbutyrate
CoA
Phosphate
ATP
Pyruvate
Ubiquinone-8
Glycolate
Palmitate
2-Methylbutyryl-CoA
L-Aspartate
Acetyl-CoA
NADPH
L-Isoleucine
ocdca
3MOP
Propionate
2-Oxoglutarate
ADP
Propionate
H+
L-Aspartate
Na+
2-Methylbutyrate
Propionyl-CoA
Formate
NADH
NAD
H+


In [49]:
# 1st degree
cef_only = cef_substrates.difference(clinda_substrates).difference(strep_substrates)
cef_only = set([iCdJ794.metabolites.get_by_id(x).name for x in cef_only])
clinda_only = clinda_substrates.difference(cef_substrates).difference(strep_substrates)
clinda_only = set([iCdJ794.metabolites.get_by_id(x).name for x in clinda_only])
strep_only = strep_substrates.difference(clinda_substrates).difference(cef_substrates)
strep_only = set([iCdJ794.metabolites.get_by_id(x).name for x in strep_only])

print('Cefoperazone:')
print(cef_only)
print('Clindamycin:')
print(clinda_only)
print('Streptomycin:')
print(strep_only)

Cefoperazone:
set()
Clindamycin:
set()
Streptomycin:
{'N-Acetyl-D-mannosamine', 'Neu5Ac', '5-Aminopentanoate'}


In [50]:
# 2nd degree
cef_only = cef_adj_cpds.difference(clinda_adj_cpds).difference(strep_adj_cpds)
cef_only = set([iCdJ794.metabolites.get_by_id(x).name for x in cef_only])
clinda_only = clinda_adj_cpds.difference(cef_adj_cpds).difference(strep_adj_cpds)
clinda_only = set([iCdJ794.metabolites.get_by_id(x).name for x in clinda_only])
strep_only = strep_adj_cpds.difference(clinda_adj_cpds).difference(cef_adj_cpds)
strep_only = set([iCdJ794.metabolites.get_by_id(x).name for x in strep_only])

print('Cefoperazone:')
print(cef_only)
print('Clindamycin:')
print(clinda_only)
print('Streptomycin:')
print(strep_only)

Cefoperazone:
set()
Clindamycin:
set()
Streptomycin:
{'Glycine', 'N-Acetyl-D-glucosamine 6-phosphate', 'UDP-galactose', 'trdrd', 'Phosphoenolpyruvate'}
