In [1]:
import cobra
import pandas as pd
import numpy as np

ModuleNotFoundError: No module named 'cobra'

In [18]:
# define functions for the estiamting delta G ranges
RT = 8.31446261815324 * 10**-3 * 298.15

def parse_reaction_to_df(reaction):
    mdf = {'met_id': [], 's': [] }
    for met in reaction.metabolites:
        mdf['s'].append(reaction.metabolites[met])
        ##mdf['met_id'].append(met.id.split('_')[0])
        mdf['met_id'].append(met.id[:-2])
        
    
    mdf = pd.DataFrame(mdf)
    mdf['met_id'] = mdf['met_id'].apply(lambda x: 'bigg.metabolite:' + x)
    return mdf


def get_concentrations(x,metConc):
    if x == 'bigg.metabolite:nad':
        return 2.6e-3
    elif x == 'bigg.metabolite:nadh':
        return 8.3e-5
    elif x == 'bigg.metabolite:nadp':
        return 2.1e-6
    elif x == 'bigg.metabolite:nadph':
        return 1.2e-4
    elif x in ['bigg.metabolite:h','bigg.metabolite:h2o']:
        return 1
    else:
        return metConc
    
    
def rdf2metConcs(mdf,ub,lb):
    maxG_conc = []
    minG_conc = []
    met_lb = lb;
    met_ub = ub;

    for i,row in mdf.iterrows():
        if row.s < 0:
            minG_conc.append(get_concentrations(row.met_id,met_ub))
            maxG_conc.append(get_concentrations(row.met_id,met_lb))
        else:
            minG_conc.append(get_concentrations(row.met_id,met_lb))
            maxG_conc.append(get_concentrations(row.met_id,met_ub))

    mdf['conc_maxG'] =  maxG_conc;
    mdf['conc_minG'] =  minG_conc;
    return mdf

def dg2dir(maxDg,minDg):
    if (maxDg > 0) & (minDg < 0):
        return 'reversible'
    elif (maxDg > 0) & (minDg > 0):
        return 'irreversible (backward)'
    else:
        return 'irreversible (forward)'
    
def append_gene_set_with_free_energy_range(model,dfff,lb,ub):

    wt_coenzyme = []
    mut_coenzyme = []

    wt_max_dg = []
    wt_min_dg = []

    mut_max_dg = []
    mut_min_dg = []

    for idx,row in dfff.iterrows():
        rsub = [x for x in model.reactions if row.reaction == x.id][0]
        mdf = parse_reaction_to_df(rsub)
        mdf_mut = mdf.copy()
        if 'bigg.metabolite:nadph' in mdf['met_id'].tolist():
            wt_coenzyme.append('NADP(H)')
            mut_coenzyme.append('NAD(H)')
            mdf_mut['met_id'] = mdf_mut.met_id.replace({'bigg.metabolite:nadph': 'bigg.metabolite:nadh', 'bigg.metabolite:nadp': 'bigg.metabolite:nad'})
        else:
            wt_coenzyme.append('NAD(H)')
            mut_coenzyme.append('NADP(H)')
            mdf_mut['met_id'] = mdf_mut.met_id.replace({'bigg.metabolite:nadh': 'bigg.metabolite:nadph', 'bigg.metabolite:nad': 'bigg.metabolite:nadp'})

        mdf = rdf2metConcs(mdf,ub,lb)
        mdf_mut = rdf2metConcs(mdf_mut,ub,lb)
        wt_max_dg.append(row.deltaG_prime_m + RT * np.log(np.prod(mdf['conc_maxG'] ** mdf['s'])))
        wt_min_dg.append(row.deltaG_prime_m + RT * np.log(np.prod(mdf['conc_minG'] ** mdf['s'])))

        mut_max_dg.append(row.deltaG_prime_m + RT * np.log(np.prod(mdf_mut['conc_maxG'] ** mdf_mut['s'])))
        mut_min_dg.append(row.deltaG_prime_m + RT * np.log(np.prod(mdf_mut['conc_minG'] ** mdf_mut['s'])))
        
    dfff['wild type coenzyme'] = wt_coenzyme
    dfff['mutant coenzyme'] = mut_coenzyme

    dfff['max_deltaG (WT)'] = wt_max_dg
    dfff['min_deltaG (WT)'] = wt_min_dg

    dfff['max_deltaG (mutant)'] = mut_max_dg
    dfff['min_deltaG (mutant)'] = mut_min_dg
        
    mut_dir  = [];
    wt_dir = []
    for idx,row in dfff.iterrows():
        wt_dir.append(dg2dir(row['max_deltaG (WT)'],row['min_deltaG (WT)']))
        mut_dir.append(dg2dir(row['max_deltaG (mutant)'],row['min_deltaG (mutant)']))

    dfff['direction (wild type)'] = wt_dir
    dfff['direction (mutant)'] = mut_dir
    return dfff


def swap_reaction(model,reaction_index,thermo):
    met = {}
    met['nad'] = np.where([x.id == 'nad_c' for x in model.metabolites])[0][0]
    met['nadp'] = np.where([x.id == 'nadp_c' for x in model.metabolites])[0][0]
    met['nadh'] = np.where([x.id == 'nadh_c' for x in model.metabolites])[0][0]
    met['nadph'] = np.where([x.id == 'nadph_c' for x in model.metabolites])[0][0]

    met_objs = {}
    met_objs['nad'] = [x for x in model.metabolites if x.id == 'nad_c'][0]
    met_objs['nadh'] = [x for x in model.metabolites if x.id == 'nadh_c'][0]
    met_objs['nadp'] = [x for x in model.metabolites if x.id == 'nadp_c'][0]
    met_objs['nadph'] = [x for x in model.metabolites if x.id == 'nadph_c'][0]
    
    rxn = model.reactions[reaction_index].copy()
    # make a new dictionary with coenzyme swapped
    v = {x:y for x,y in rxn.metabolites.items() if x.id in [x + '_c' for x in list(met)]}

    v2 = {}
    for x,y in v.items():
        if x.id == 'nadph_c':
            v2[met_objs['nadh']] = y
        elif x.id == 'nadp_c':
            v2[met_objs['nad']] = y
        elif x.id == 'nadh_c':
            v2[met_objs['nadph']] = y
        elif x.id == 'nad_c':
            v2[met_objs['nadp']] = y
            
    rxn.subtract_metabolites(v)
    rxn.add_metabolites(v2)
    rxn.id = rxn.id + '[mutant]'
    if thermo == 'irreversible (forward)':
        rxn.lower_bound = 0;rxn.upper_bound = 1000;
    elif thermo == 'irreversible (backward)':
        rxn.lower_bound = -1000;rxn.upper_bound = 0;
    elif thermo == 'reversible':
        rxn.lower_bound = -1000;rxn.upper_bound = 1000;
    
        
    model.remove_reactions(m.reactions[reaction_index])
    model.add_reaction(rxn)
    return model

In [3]:
model = cobra.io.read_sbml_model('assets/iJO1366.xml')
gene_set = pd.read_csv('assets/EC_1.X.1.X.Unique_NAD(P)_Coupled.GeneSet.Thermo.csv')

In [15]:
gene_set = append_gene_set_with_free_energy_range(model,gene_set,1e-6,1e-2)

In [39]:
# build gene mutation SBML models for stoichiometry only mutations
for gene,dfn in gene_set.groupby('gene'):
    m = model.copy()
    for idx,row in dfn.iterrows():
        j = np.where([x.id == row.reaction for x in m.reactions])[0][0]
        # swap reaction, keep direction from wild type reaction
        m = swap_reaction(m,j,row['direction (wild type)'])
    m.id = m.id + '_' + gene + '_' + dfn['mutant coenzyme'].tolist()[0].split('(')[0]
    fileName = 'assets/models/stoich/' + m.id + '.xml'
    cobra.io.write_sbml_model(m,fileName)

print('Finished building stoich only models')


# build gene mutation SBML models with thermo and stoich
for gene,dfn in gene_set.groupby('gene'):
    m = model.copy()
    for idx,row in dfn.iterrows():
        j = np.where([x.id == row.reaction for x in m.reactions])[0][0]
        # swap reaction, keep direction from wild type reaction
        m = swap_reaction(m,j,row['direction (mutant)']);
    m.id = m.id + '_' + gene + '_' + dfn['mutant coenzyme'].tolist()[0].split('(')[0]
    fileName = 'assets/models/stoich_thermo/' + m.id + '.xml'
    cobra.io.write_sbml_model(m,fileName)

print('Finished building stoich+thermo models')

  warn("need to pass in a list")


Finished building stoich only models
Finished building stoich+thermo models
