## Generate MetaCyc-dependendt files

NOTE: RUN generate_flatfiles AGAIN AFTER MAKING CHANGES TO THIS

### protein_complexes.txt

To turn on pathway tools. Go to pathway tools directory ('/opt/pathway-tools') and run ./pathway-tools -lisp -python:

In [1]:
import cobra
import os
import pandas as pd
import numpy as np
import urllib
import pythoncyc as pc
import re
from Bio.SeqUtils import seq3
from Bio import Seq
from os.path import join

## Metabolites

met_output_file = 'metabolites.txt'

# Define Models
directory = '/home/jt/UCSD/bacillusme-master/bacillusme/building_data/'
eco_directory = join(directory, 'iJO1366.json')
ijo_directory = join(directory, 'iYO844.json')
uni_directory = join(directory, 'universal_model.json')

eco = cobra.io.load_json_model(eco_directory)
m_model = cobra.io.load_json_model(ijo_directory)
uni = cobra.io.load_json_model(uni_directory)


In [2]:
m_model.optimize()

<Solution 0.12 at 0x7fa27ce5e450>

In [3]:
## Remove reactions
remove_reactions = ['ETOHt3' # There is no evidence of ethanol being actively transported
                   ]

for rxn_id in remove_reactions:
    rxn = m_model.reactions.get_by_id(rxn_id)
    rxn.remove_from_model()

In [4]:
## Copper transport fixes
cu_c = cobra.Metabolite('cu_c')
cu_e = cobra.Metabolite('cu_e')
m_model.add_metabolites([cu_c,cu_e])

# CUt1 is for Cu+1 (BSU33500)
r = m_model.reactions.Cut1
r.subtract_metabolites({cobra.Metabolite('cu2_c'):-1,cobra.Metabolite('cu2_e'):1})
r.add_metabolites({'cu_e':-1,'cu_c':1})
print(r.reaction)

# Cu2abc1 is for Cu+2(BSU33500)
r = cobra.Reaction('CU2abc1')
m_model.add_reaction(r)
r.add_metabolites({'atp_c':-1,'cu2_e':-1,'h2o_c':-1,
                   'adp_c':1,'cu2_c':1,'h_c':1,'pi_c':1})
r.gene_reaction_rule = 'BSU33500'
print(r.reaction)

# Cobalt(II) import through permease
r = cobra.Reaction('COBALTt5')
m_model.add_reaction(r)
r.add_metabolites({'cobalt2_e':-1,'cobalt2_c':1})
r.name = 'Cobalt(II) transport via diffusion'
r.gene_reaction_rule = 'BSU33490'
print(r.reaction)

# Arginine is transported through permeases and ABC transporters, not proton symport
r = m_model.reactions.ARGPt6
r.remove_from_model()

# Ethanol diffuses through membrane
r = cobra.Reaction('ETOHtex')
m_model.add_reaction(r)
r.add_metabolites({'etoh_e':-1,'etoh_c':1})
r.name = 'Ethanol transport via diffusion'
r.gene_reaction_rule = ''
print(r.reaction)


## Chloride-Potassium transport through BSU31100. Not necessary as cl is not used by the model.

## Thiamine (vitamin B2) transport fixes
# r = cobra.Reaction('THMabc')
# m_model.add_reaction(r)
# r.add_metabolites({'atp_c':-1,'cu2_e':-1,'h2o_c':-1,
#                    'adp_c':1,'cu2_c':1,'h_c':1,'pi_c':1})
# r.gene_reaction_rule = 'BSU33490'
# print(r.reaction)

atp_c + cu_e + h2o_c --> adp_c + cu_c + h_c + pi_c
atp_c + cu2_e + h2o_c --> adp_c + cu2_c + h_c + pi_c
cobalt2_e --> cobalt2_c
etoh_e --> etoh_c


In [5]:
## New transporters
new_reactions_dict = {
    'CBLtex':'cbl1_e -> cbl1_c',
    'EX_cbl1_e':'cbl1_e <=>'
}

for rxn_id in new_reactions_dict.keys():
    rxn = cobra.Reaction(rxn_id)
    m_model.add_reaction(rxn)
    rxn.build_reaction_from_string(new_reactions_dict[rxn_id])

unknown metabolite 'cbl1_e' created
unknown metabolite 'cbl1_c' created


In [6]:
## Fix of gene reaction rules
new_rules_dict = {
    ## BsubCyc
    'ACTD2' : 'BSU08060 and BSU08070',
    'AIRC1' : 'BSU06420 or BSU06430',
    'ANS' : 'BSU00750 or BSU22680',
    'PRFGS_1' : 'BSU06480 or BSU06470',
    'RNDR1' : 'BSU17380 and BSU17390',
    'RNDR2' : 'BSU17380 and BSU17390',
    'RNDR3' : 'BSU17380 and BSU17390',
    'RNDR4' : 'BSU17380 and BSU17390',
    'LYSLG_BS' : '', # Former rule BG12900 of inexistent gene
    'PNTOt2': '', # Former rule BSU38240 is a putative Acetate/Na symporter
    'Kt3r':'BSU31610 and BSU31660 and BSU31600 and BSU31620 and BSU31630 and BSU31650 and BSU31640', # Rule is OR, but it should be AND. CPL8J2-158
    'NAt3_1':'BSU31600 and BSU31610 and BSU31620 and BSU31630 and BSU31640 and BSU31650 and BSU31660 or BSU09680 or BSU33420 or BSU11640 or BSU09850',
    
    ## From BLAST
    'HCO3E' : 'BSU30690',
    'PGL':'BSU13010',
    
    ## TransportDB
    'F6Pt6_2':'BSU12010 or BSU14400',
    'ACt2r' : 'BSU38240',
    
    ## New transporters from BsubCyc
    'CBLtex':'BSU33170',
    'ASPt2r':'BSU10220',
#     'PSER_Lt6':'BSU19999', # This one comes from DELTA-BLAST the human protein to bacillus (?)
    'MAN6Pt6':'BSU10520',
    'PYRt2':'BSU28900 and BSU28910',
    'GLCpts':'BSU13890 or (BSU38570 and BSU38580 and BSU38590)',
    'RIBFLVt2':'BSU23050'
    
}
pd.DataFrame.from_dict({'rule':new_rules_dict}).to_csv('new_rules.csv')
for rxn_id in new_rules_dict.keys():
    m_model.reactions.get_by_id(rxn_id).gene_reaction_rule = new_rules_dict[rxn_id]

In [7]:
pd.DataFrame.from_dict({'rule':new_rules_dict})

Unnamed: 0,rule
ACTD2,BSU08060 and BSU08070
ACt2r,BSU38240
AIRC1,BSU06420 or BSU06430
ANS,BSU00750 or BSU22680
ASPt2r,BSU10220
CBLtex,BSU33170
F6Pt6_2,BSU12010 or BSU14400
GLCpts,BSU13890 or (BSU38570 and BSU38580 and BSU38590)
HCO3E,BSU30690
Kt3r,BSU31610 and BSU31660 and BSU31600 and BSU3162...


In [8]:
def frameid_to_str(frameid):
    string = str(frameid)
    string = string.replace('|','')
    if '_MISC_' in string:
        string = string.replace('_MISC_','_misc_') # GenBank and BsubCyc use different cases.
    return string

def frames_to_strlist(frames):
    strlist = []
    for instance in frames:
        strlist.append(frameid_to_str(instance.frameid))
    return strlist
        
def generate_prot_cplx_dict(DB):
    PROTEINS = DB.proteins
    protein_complexes_dict = dict()
    for cplx in PROTEINS.instances:
        cplx_string = frameid_to_str(cplx.frameid)
        genes_of_cplx = pc.PGDB.genes_of_protein(DB,cplx)
        if genes_of_cplx:
            for gene in genes_of_cplx:
                gene_string = frameid_to_str(gene)                    
                if cplx_string not in protein_complexes_dict.keys():
                    protein_complexes_dict[cplx_string] = []
                protein_complexes_dict[cplx_string].append(gene_string)
    return protein_complexes_dict

def generate_id_to_accession_dict(DB):
    gene_id_dict = dict()
    GENES = DB.genes
    for gene in GENES.instances:
        gene_data = pc.PToolsFrame.PFrame.get_frame_data(gene)
        gene_id = frameid_to_str(gene.frameid)
        gene_id_dict[gene_id] = str(gene_data.accession_1)
    return gene_id_dict

In [9]:
bsub = pc.select_organism('bsub')
ecoli = pc.select_organism('ecoli')

In [10]:
PROTEINS = bsub.proteins

In [11]:
protein_complexes_dict = generate_prot_cplx_dict(bsub)
ecoli_protein_complexes_dict = generate_prot_cplx_dict(ecoli)

In [12]:
## MANUALLY ADDED COMPLEXES
protein_complexes_dict['secYEG'] = ['BSU01360','BSU01000','BSU33630']
protein_complexes_dict['SRP-CPLX'] = ['BSU15980','BSU_misc_RNA_2']

In [13]:
gene_id_dict = generate_id_to_accession_dict(ecoli)
ecoli_gene_to_cplx_dict = dict()
for cplx_id in ecoli_protein_complexes_dict.keys():
    old_values = ecoli_protein_complexes_dict[cplx_id]
    new_values = []
    for old_value in old_values:
        try:
            new_values.append(gene_id_dict[old_value])
        except:
            continue
    for new_value in new_values:
        if new_value not in ecoli_gene_to_cplx_dict.keys():
            ecoli_gene_to_cplx_dict[new_value] = cplx_id
    ecoli_protein_complexes_dict[cplx_id] = new_values

In [14]:
## Get stoichiometry from homology with ECOLI
ecoli_blast_df = pd.read_csv('blast_out_bsub_as_ref.txt', sep='\t')

bsub_to_ecoli_dict = dict()
ecoli_to_bsub_dict = dict()
for key, row in ecoli_blast_df.iterrows():
    if row['ident'] > 0.2:
        bsub_id = row['BSUB_gene']
        ecoli_id = row['ECOLI_gene']
        bsub_to_ecoli_dict[bsub_id] = ecoli_id
        ecoli_to_bsub_dict[ecoli_id] = bsub_id
# BLAST might have found false positive gene hits.
# However, stoichiometry is only used if proteins in complex are the same as well.


In [15]:
## Get stoichiometry information from ECOLI
ecoli_stoich_df = pd.read_csv('ecoli_protein_complexes.txt', sep='\t',
                              names=['Complex', 'Name', 'Stoichiometry',
                                 'Source'])
ecoli_stoich_dict = dict()
for key, row in ecoli_stoich_df.iterrows():
    cplx_id = row['Complex']
    ecoli_stoich_dict[cplx_id] = []
    stoichiometry_dict = dict()
    for bnums in row['Stoichiometry'].split(' AND '):
        bnum, num = bnums.rstrip(')').split('(')
        stoichiometry = float(num) if not num == '' else 1.
        stoichiometry_dict[bnum] = stoichiometry
        ecoli_stoich_dict[cplx_id] = stoichiometry_dict

In [16]:
## Get possible complex homolog in ecoli
cplx_to_cplx_dict = dict()

for cplx_id in protein_complexes_dict.keys():
    gene_ids = protein_complexes_dict[cplx_id]
    
    for gene_id in gene_ids:
        try:
            ecoli_gene = bsub_to_ecoli_dict[gene_id]
            ecoli_cplx = ecoli_gene_to_cplx_dict[ecoli_gene]
            
            ecoli_cplx_genes = ecoli_protein_complexes_dict[ecoli_cplx]
            
            if len(ecoli_cplx_genes) == len(gene_ids):
                converted_bsub_genes = []
                for cplx_gene in gene_ids:
                    converted_bsub_genes.append(bsub_to_ecoli_dict[cplx_gene])
                if len(set(ecoli_cplx_genes) & set(converted_bsub_genes)) == len(ecoli_cplx_genes):
                    cplx_to_cplx_dict[cplx_id] = ecoli_cplx
        except:
            continue

In [17]:
# Create stoichiometry
bsub_stoich_dict = dict()
complexes_not_included = []
for cplx in protein_complexes_dict.keys():
    if cplx in cplx_to_cplx_dict.keys():
        ecoli_cplx = cplx_to_cplx_dict[cplx]
        try:
            ecoli_cplx_stoich_dict = ecoli_stoich_dict[ecoli_cplx]
            bsub_cplx_stoich_dict = dict()
            for gene_of_ecoli_cplx in ecoli_cplx_stoich_dict.keys():
                bsub_gene_id = ecoli_to_bsub_dict[gene_of_ecoli_cplx]
                bsub_cplx_stoich_dict[bsub_gene_id] = ecoli_cplx_stoich_dict[gene_of_ecoli_cplx]
            bsub_stoich_dict[cplx] = bsub_cplx_stoich_dict
        except:
            complexes_not_included.append(ecoli_cplx)
print 'A total of ' + str(len(complexes_not_included)) + ' ecoli complexes were not included'

A total of 1025 ecoli complexes were not included


In [18]:
# Add bsubcyc complex stoichiometry here and correct bsub_stoich_dict

#### gene_dictionary.csv

In [19]:
name_to_locus_dict = dict()
GENES = bsub.genes
for gene in GENES.instances:
    gene_data = pc.PToolsFrame.PFrame.get_frame_data(gene)
    gene_id = frameid_to_str(gene.frameid)
    
    name = str(gene_data.common_name)
    name_to_locus_dict[name] = gene_id
    if not gene_data.synonyms:
        continue
    for syn in gene_data.synonyms:
        syn_id = frameid_to_str(syn)
        name_to_locus_dict[syn_id] = gene_id

In [20]:
gene_dictionary = pd.DataFrame.from_dict({'locus_id':name_to_locus_dict})
gene_dictionary.index.name = 'name'
gene_dictionary.to_csv('gene_dictionary.csv')

#### bsub_enzyme_stoichiometry.txt

In [21]:
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
import urllib2
import re
from tqdm import tqdm
import json
gene_dictionary = pd.read_csv('gene_name_dictionary.csv',index_col=1)
# Just read, but run again if needed (takes 1 hour or more)
bsubcyc_complex_stoichiometry_dict = json.loads(open("bsub_enzyme_stoichiometry.txt","r").read() )

In [22]:
# # Just read, but uncomment if needed (takes 1 hour or more)
# protein_html ='https://bsubcyc.org/BSUB/NEW-IMAGE?type=ENZYME&object={}'
# bsubcyc_complex_stoichiometry_dict = dict()
# for protein_id in tqdm(protein_complexes_dict.keys()):
# #protein_id = 'CPLX8J2-59'
#     # Get HTML
#     try:
#         html_page = urllib2.urlopen(protein_html.format(protein_id))
#     except:
#         # No HTML
#         continue
#     soup = BeautifulSoup(html_page)

#     # Parse HTML
#     try:
#         raw_string = soup.findAll('a',attrs={'href':'/BSUB/NEW-IMAGE?type=POLYPEPTIDE&object={}'.format(protein_id)})[0]
#     except:
#         # Different format
#         try:
#             raw_string = soup.findAll('a',attrs={'class':'ENZYME'})[0]
#         except:
#             # No HTML
#             continue
#     raw_string = str(raw_string)
#     stoich_raw_string = raw_string.split('"')[-1].split('=  ')[-1]

#     # Create dictionary
#     subcomplex_list = stoich_raw_string.split('][')
#     bsubcyc_complex_stoichiometry_dict[protein_id] = dict()
#     for subcplx in subcomplex_list:
#         subcplx_raw_string = subcplx.replace('[','').replace(']','').replace('(','').replace(')','')
#         if '<sub>' in subcplx_raw_string:
#             split_string = subcplx_raw_string.replace('<sub>',',').replace('</sub>',',').split(',')[:-1]
#             for idx,el in enumerate(split_string):
#                 if (idx+1)%2:
#                     gene = split_string[idx]
#                     stoich = split_string[idx+1]
#                     if gene:
#                         gene = gene.replace(gene[0],gene[0].lower())
#                         if gene in gene_dictionary.index:
#                             locus_id = gene_dictionary.loc[gene]['locus_id']
#                             bsubcyc_complex_stoichiometry_dict[protein_id][locus_id] = stoich
#                         else:
#                             print(gene, ' not found')

In [23]:
# import json
# bsubcyc_complex_stoichiometry_dict
# with open ('bsub_enzyme_stoichiometry.txt','w') as file:
#     file.write(json.dumps(bsubcyc_complex_stoichiometry_dict))

#### Create txt file

In [24]:
filename = 'protein_complexes.txt'
file = open(filename,'w')
CPLX_list = []
genes_all_cplxs = []

for cplx in protein_complexes_dict:
    cplx_string = cplx
    string = cplx_string + '\t' + 'default_name' + '\t'
    try:
        genes_of_cplx = protein_complexes_dict[cplx]
    except:
        genes_of_cplx = []
    if genes_of_cplx:
        for gene_id in genes_of_cplx:
            # If gene info is in bsubcyc
            if cplx_string in bsubcyc_complex_stoichiometry_dict.keys() and \
                        gene_id in bsubcyc_complex_stoichiometry_dict[cplx_string].keys():
                stoich = bsubcyc_complex_stoichiometry_dict[cplx_string][gene_id]
            # If not, use information from ecoli
            else:
                try:
                    stoich = str(int(bsub_stoich_dict[cplx_string][gene_id]))
                except:
                    stoich = ''
            string = string + gene_id + '(' + stoich + ')' + ' AND '
        string = string[0:len(string)-5]
        string = string + '\t' + 'M_protein_recon' + '\n'
        
        file.write(string)

        CPLX_list.append(cplx_string)
    genes_all_cplxs.append(genes_of_cplx)
    
file.close()

## protein_modification.txt

In [25]:
import pandas
ref_filename = 'ecoli_protein_modification.txt'
complex_mods = pandas.read_table(ref_filename)
complex_mods = complex_mods.set_index('Modified_enzyme')

In [26]:
from six import iteritems
new_mod_dict = {}
for key, value in iteritems(complex_mods.T.to_dict()):
    if key.startswith('#'):
            continue
    key = key.replace('_DASH_', '__')
    
    protein_name = value['Core_enzyme']
    new_mod_dict[protein_name] = {}
    new_mod_dict[protein_name]['modifications'] = {}
    for mods in value['Modifications'].split(' AND '):
        mod, num_mods = mods.rstrip(')').split('(')
        if num_mods == '':
            num_mods = 1.
        else:
            num_mods = float(num_mods)

        mod = mod.replace('_DASH_', '__')
        new_mod_dict[protein_name]['modifications'][mod] = num_mods
ecoli_mod_dict = new_mod_dict

In [27]:
filename = 'protein_modification.txt'
file = open(filename,'w')
file.write('Modified_enzyme' + '\t' + 'Core_enzyme' + '\t' + 'Modifications' + '\t' + 'Source' + '\n')

cplx_cofactor_dict = dict()
cplx_cofactor_dict = {}
for cplx in protein_complexes_dict.keys():
    if cplx in cplx_to_cplx_dict.keys():
        try:
            cplx_cofactor_dict[cplx] = ecoli_mod_dict[cplx_to_cplx_dict[cplx]]['modifications']
        except:
            pass

for cplx in cplx_cofactor_dict.keys():
    cofactor_string = ''
    mod_string = ''
    for cofactor in cplx_cofactor_dict[cplx].keys():
        stoich = str(int(cplx_cofactor_dict[cplx][cofactor]))
        stoich_string = stoich
        if stoich == '1':
            stoich_string = ''
        cofactor_string = cofactor_string + cofactor + '(' + stoich_string + ')' + ' AND '
        
        if stoich == '1':
            mod_string = mod_string + '_mod_' + stoich_string + cofactor
        else:
            mod_string = mod_string + '_mod_' + stoich_string + ':' + cofactor
    cofactor_string = cofactor_string[0:len(cofactor_string)-5]
    
    string = cplx + mod_string +'\t' + cplx + '\t' + cofactor_string + '\t' + 'M_protein_recon' + '\n'
    
    file.write(string)
file.close()

In [28]:
## Fe-S transfer
filename = 'protein_modification.txt'
file = open(filename,'a')

string = 'BSU32680-MONOMER_mod_1:2fe2s' +'\t' + 'BSU32680-MONOMER' + '\t' + \
            '2fe2s(1)' + '\t' + 'M_protein_recon' + '\n'
file.write(string)

string = 'BSU32680-MONOMER_mod_1:4fe4s' +'\t' + 'BSU32680-MONOMER' + '\t' + \
            '4fe4s(1)' + '\t' + 'M_protein_recon' + '\n'
file.write(string)

## M-model complexes
string = 'BSU15920-MONOMER_mod_pan4p' +'\t' + 'BSU15920-MONOMER' + '\t' + \
            'pan4p()' + '\t' + 'M_protein_recon' + '\n'
file.write(string)

#string = 'BSU28500-MONOMER_mod_Oxidized' +'\t' + 'BSU28500-MONOMER' + '\t' + \
#            'Oxidized()' + '\t' + 'M_protein_recon' + '\n'
#file.write(string)

file.close()

### enzyme_reaction_association.txt

In [29]:
filename = 'enzyme_reaction_association.txt'
prot_cplx_filename = 'protein_complexes.txt'

file = open(filename,'w')
prot_cplx_file = open(prot_cplx_filename,'a')

standard_gene_length = 8

enz_rxn_assoc_list = []
artificial_cplxs = []
artificial_id = 0;

enz_rxn_assoc_dict = dict()

for reaction in m_model.reactions:
    if not (reaction.id[0:3] == 'EX_') and not (reaction.id[0:3] == 'DM_'):
        # Skip empty rules, these reactions are assigned to CPLX_dummy
        if not reaction.gene_reaction_rule:
            continue
            
        string = str(reaction.id) + '\t' 

        rule_string = str(reaction.gene_reaction_rule)
            
        if rule_string:
            rule_string = rule_string.replace('(','')
            rule_string = rule_string.replace(')','')
            rule_list = rule_string.split(' or ')
            enz_rxn_assoc = []

            reaction_cplx_list = []
            for rule in rule_list:
                rule_gene_list = rule.split(' and ')
                
                for index in range(0,len(genes_all_cplxs)-1):
                    ref_rule = genes_all_cplxs[index]
                    if set(ref_rule) == set(rule_gene_list):
                        rule_cplx = CPLX_list[index]
                        reaction_cplx_list.append(rule_cplx)

            enz_rxn_assoc_list.append(reaction_cplx_list)
            string = str(reaction.id) + '\t'

            if reaction_cplx_list:
                for cplx in reaction_cplx_list:
                    try:
                        cplx_id = cplx
                        for cofactor in cplx_cofactor_dict[cplx].keys():
                            stoich = int(cplx_cofactor_dict[cplx][cofactor])
                            if stoich == 1:
                                cplx_id = cplx_id + '_mod_' + cofactor
                            else:
                                cplx_id = cplx_id + '_mod_' + str(stoich) + ':' + cofactor
                    except:
                        cplx_id = cplx
                    string = string + cplx_id + ' OR '
            else:
                for rule in rule_list:
                    stoichiometry_string = ''
                    if len(rule) == standard_gene_length:
                        artificial_cplx = rule + '-MONOMER'
                        stoichiometry_string = rule + '()'
                    else:
                        artificial_id = artificial_id + 1
                        artificial_cplx = 'CPLX000-' + str(artificial_id)
                        cplx_gene_list = rule.split(' and ')

                        for gene in cplx_gene_list:
                            stoichiometry_string = stoichiometry_string + gene + '()' + ' AND '
                        stoichiometry_string = stoichiometry_string[0:len(stoichiometry_string)-5]
                    string = string + artificial_cplx + ' OR '
                    
                    if artificial_cplx not in artificial_cplxs:
                        artificial_cplxs.append(artificial_cplx)
                        prot_cplx_file.write(artificial_cplx + '\t' + 'default_name' + '\t' +
                                             stoichiometry_string + '\t' + 'M_protein_recon' + '\n')
            
        string = string[0:len(string)-4]
        string = string + '\n'
        file.write(string)
        
        enz_rxn_assoc_dict[cplx_id] = reaction.id
    
file.close() 
prot_cplx_file.close()
artificial_cplxs = list(set(artificial_cplxs))



In [30]:
rule_cplx

'BSU33170-MONOMER'

## TUs_from_bsubcyc.txt

In [31]:
## Get rho dependence
rho_dependent_TUs = []
for terminator in bsub.rho_independent_terminators.instances:
    terminator_data = pc.PToolsFrame.PFrame.get_frame_data(terminator)
    terminator_id = frameid_to_str(terminator.frameid)
    TU_list = terminator_data.component_of
    
    for tu in TU_list:
        tu_id = frameid_to_str(tu)
        tu_id = tu_id.replace('-','_') 
        if tu_id not in rho_dependent_TUs:
            rho_dependent_TUs.append(tu_id)

In [32]:
# Get sigma-promoterBOX dict
promoterBOX_to_sigma_dict = dict()
for sigma in bsub.sigma_factors.instances:
    sigma_data = pc.PToolsFrame.PFrame.get_frame_data(sigma)
    sigma_id = frameid_to_str(sigma_data.frameid)
    promoterBOXes = sigma_data.recognized_promoters
    if promoterBOXes:
        for promoterBOX in promoterBOXes:
            promoterBOX_id = frameid_to_str(promoterBOX)
            if promoterBOX_id in promoterBOX_to_sigma_dict.keys():
                promoterBOX_to_sigma_dict[promoterBOX_id].append(sigma_id)
            else:
                promoterBOX_to_sigma_dict[promoterBOX_id] = sigma_id

In [33]:
TU_to_sigma_dict = dict()

# Get promoter - promoterBOX dict
promoter_to_promoterBOX_dict = dict()

for promoter in bsub.promoters.instances:
    promoter_data = pc.PToolsFrame.PFrame.get_frame_data(promoter)
    promoter_id = frameid_to_str(promoter.frameid)
    promoterBOXes = promoter_data.promoter_boxes
    if promoterBOXes:
        promoterBOX = frameid_to_str(promoterBOXes[0])
        TUs = promoter_data.component_of
        if TUs:
            for TU in TUs:
                TU_id = frameid_to_str(TU)
                TU_id = tu_id = TU_id.replace('-','_') 
                if TU_id in TU_to_sigma_dict.keys():
                    continue
                else:
                    try:
                        TU_to_sigma_dict[TU_id] = promoterBOX_to_sigma_dict[promoterBOX]
                    except:
                        TU_to_sigma_dict[TU_id] = 'BSU25200-MONOMER' # No sigma information, assume RpoD
TU_to_sigma_dict

{'CHROM_1_0': 'BSU25200-MONOMER',
 'CHROM_1_1': 'BSU00980-MONOMER',
 'CHROM_1_10': 'BSU15320-MONOMER',
 'CHROM_1_11': 'BSU04730-MONOMER',
 'CHROM_1_12': 'BSU25200-MONOMER',
 'CHROM_1_13': 'BSU25200-MONOMER',
 'CHROM_1_14': 'BSU25200-MONOMER',
 'CHROM_1_15': 'BSU15320-MONOMER',
 'CHROM_1_16': 'BSU25200-MONOMER',
 'CHROM_1_17': 'BSU25200-MONOMER',
 'CHROM_1_18': 'BSU34200-MONOMER',
 'CHROM_1_19': 'BSU25200-MONOMER',
 'CHROM_1_2': 'BSU25200-MONOMER',
 'CHROM_1_20': 'MONOMER8J2-6',
 'CHROM_1_21': 'BSU00980-MONOMER',
 'CHROM_1_22': 'BSU25200-MONOMER',
 'CHROM_1_23': 'BSU25200-MONOMER',
 'CHROM_1_24': 'MONOMER8J2-6',
 'CHROM_1_25': 'BSU25200-MONOMER',
 'CHROM_1_26': 'BSU25200-MONOMER',
 'CHROM_1_27': 'BSU25200-MONOMER',
 'CHROM_1_28': 'BSU25200-MONOMER',
 'CHROM_1_29': 'BSU25200-MONOMER',
 'CHROM_1_3': 'BSU15320-MONOMER',
 'CHROM_1_30': 'BSU25200-MONOMER',
 'CHROM_1_31': 'BSU16470-MONOMER',
 'CHROM_1_32': 'BSU25200-MONOMER',
 'CHROM_1_33': 'BSU25200-MONOMER',
 'CHROM_1_34': 'BSU25200-MONOMER

In [34]:
from Bio import SeqIO

filename = 'TUs_from_bsubcyc.txt'
file = open(filename,'w')

gb_filename = 'NC_000964.gb'   
gb_file = SeqIO.read(gb_filename, 'gb')
full_seq = str(gb_file.seq)
element_types={'CDS', 'rRNA','tRNA', 'ncRNA'}

head_string = 'TU_id' + '\t' + 'start' + '\t' + 'stop' + '\t' + 'tss' + '\t' + 'strand' + '\t' + 'rho_dependent' + '\t' + 'sigma' + '\n'
file.write(head_string)
TUs = bsub.transcription_units
for tu_PFrame in TUs.instances:
    tu_data = pc.PToolsFrame.PFrame.get_frame_data(tu_PFrame)
    
    ## Start and stop
    positions = []
    for gene_fid in tu_data.components:
        if 'BSU' in gene_fid:
            gene_PFrame = pc.PToolsFrame.PFrame(gene_fid,bsub,getFrameData=False, isClass=False)
            gene_data = pc.PToolsFrame.PFrame.get_frame_data(gene_PFrame)
            positions.append(gene_data.left_end_position)
            positions.append(gene_data.right_end_position)
            gene_id = gene_fid.replace('BSU','BSU_')
    if positions:
        tu_start = min(positions)
        tu_stop = max(positions)
    else:
        tu_start = 0
        tu_stop = 0
    
    tu_tss = tu_stop

    ## ID
    tu_id = frameid_to_str(tu_data.frameid)
    tu_id = tu_id.replace('-','_')
    
    ## Sigma
    try:
        sigma = TU_to_sigma_dict[tu_id]
    except:
        sigma = 'BSU25200-MONOMER' # No sigma information, assume RpoD
    
    ## Rho
    rho_dependence = 'False' if tu_id in rho_dependent_TUs else 'True'
    
    ## Strand    
    gene_id = gene_id[1:len(gene_id)-1] ## Only use one gene. The others should have the same direction.
    for feature in gb_file.features:
        if feature.type not in element_types or 'pseudo' in feature.qualifiers:
            continue
        if feature.qualifiers["locus_tag"][0] == gene_id:
            strand = '+' if feature.location.strand == 1 else '-'
            
    ##
    tu_id = tu_id + '_from_' + sigma
    string = str(tu_id) + '\t' + str(tu_start) + '\t' + str(tu_stop) + '\t' + str(tu_tss) + '\t' + str(strand) + '\t' + rho_dependence + '\t' + sigma + '\n'
    
    file.write(string)
    
file.close()

In [35]:
df = pd.read_csv('TUs_from_ecocyc.txt',sep='\t')

len(set(df['sigma'].values))

7

## trna_to_codon dictionary

In [36]:
DNA_to_codon_table = {'TTT': 'F',
 'TTC': 'F',
 'TTA': 'L',
 'TTG': 'L',
 'TCT': 'S',
 'TCC': 'S',
 'TCA': 'S',
 'TCG': 'S',
 'TAT': 'Y',
 'TAC': 'Y',
 'TAA': '*',
 'TAG': '*',
 'TGT': 'C',
 'TGC': 'C',
 'TGA': '*',
 'TGG': 'W',
 'CTT': 'L',
 'CTC': 'L',
 'CTA': 'L',
 'CTG': 'L',
 'CCT': 'P',
 'CCC': 'P',
 'CCA': 'P',
 'CCG': 'P',
 'CAT': 'H',
 'CAC': 'H',
 'CAA': 'Q',
 'CAG': 'Q',
 'CGT': 'R',
 'CGC': 'R',
 'CGA': 'R',
 'CGG': 'R',
 'ATT': 'I',
 'ATC': 'I',
 'ATA': 'I',
 'ATG': 'M',
 'ACT': 'T',
 'ACC': 'T',
 'ACA': 'T',
 'ACG': 'T',
 'AAT': 'N',
 'AAC': 'N',
 'AAA': 'K',
 'AAG': 'K',
 'AGT': 'S',
 'AGC': 'S',
 'AGA': 'R',
 'AGG': 'R',
 'GTT': 'V',
 'GTC': 'V',
 'GTA': 'V',
 'GTG': 'V',
 'GCT': 'A',
 'GCC': 'A',
 'GCA': 'A',
 'GCG': 'A',
 'GAT': 'D',
 'GAC': 'D',
 'GAA': 'E',
 'GAG': 'E',
 'GGT': 'G',
 'GGC': 'G',
 'GGA': 'G',
 'GGG': 'G'}

tRNA_to_codon_table = dict()
for key in DNA_to_codon_table:
    aa_id = DNA_to_codon_table[key]
    aa_id = seq3(aa_id)
    key_tRNA = Seq.transcribe(key)
    tRNA_to_codon_table[key_tRNA] = aa_id
    

tRNA_to_codon_table

{'AAA': 'Lys',
 'AAC': 'Asn',
 'AAG': 'Lys',
 'AAU': 'Asn',
 'ACA': 'Thr',
 'ACC': 'Thr',
 'ACG': 'Thr',
 'ACU': 'Thr',
 'AGA': 'Arg',
 'AGC': 'Ser',
 'AGG': 'Arg',
 'AGU': 'Ser',
 'AUA': 'Ile',
 'AUC': 'Ile',
 'AUG': 'Met',
 'AUU': 'Ile',
 'CAA': 'Gln',
 'CAC': 'His',
 'CAG': 'Gln',
 'CAU': 'His',
 'CCA': 'Pro',
 'CCC': 'Pro',
 'CCG': 'Pro',
 'CCU': 'Pro',
 'CGA': 'Arg',
 'CGC': 'Arg',
 'CGG': 'Arg',
 'CGU': 'Arg',
 'CUA': 'Leu',
 'CUC': 'Leu',
 'CUG': 'Leu',
 'CUU': 'Leu',
 'GAA': 'Glu',
 'GAC': 'Asp',
 'GAG': 'Glu',
 'GAU': 'Asp',
 'GCA': 'Ala',
 'GCC': 'Ala',
 'GCG': 'Ala',
 'GCU': 'Ala',
 'GGA': 'Gly',
 'GGC': 'Gly',
 'GGG': 'Gly',
 'GGU': 'Gly',
 'GUA': 'Val',
 'GUC': 'Val',
 'GUG': 'Val',
 'GUU': 'Val',
 'UAA': 'Ter',
 'UAC': 'Tyr',
 'UAG': 'Ter',
 'UAU': 'Tyr',
 'UCA': 'Ser',
 'UCC': 'Ser',
 'UCG': 'Ser',
 'UCU': 'Ser',
 'UGA': 'Ter',
 'UGC': 'Cys',
 'UGG': 'Trp',
 'UGU': 'Cys',
 'UUA': 'Leu',
 'UUC': 'Phe',
 'UUG': 'Leu',
 'UUU': 'Phe'}

In [37]:
def get_key(my_dict, val): 
    key_list = []
    for key, value in my_dict.items(): 
         if val == value: 
                key_list.append(key)
    return key_list



In [38]:
trna_to_codon = dict()
for tRNA_PFrame in bsub.tRNAs.instances:
    tRNA_data = pc.PToolsFrame.PFrame.get_frame_data(tRNA_PFrame)
    
    tRNA_id = str(tRNA_data.frameid)
    tRNA_id = tRNA_id.replace('|','')
    tRNA_id = tRNA_id.replace('-tRNA','')
    tRNA_id = tRNA_id.replace('TRNA','tRNA')
    
    aa_id = tRNA_data.common_name
    aa_id = aa_id[len(aa_id)-3:len(aa_id)]
    
    codon_list = get_key(tRNA_to_codon_table, aa_id)
    
    trna_to_codon[tRNA_id] = codon_list
    
trna_to_codon
    

{'BSU_tRNA_1': ['UUU', 'UUC'],
 'BSU_tRNA_10': ['AUG'],
 'BSU_tRNA_11': ['GAA', 'GAG'],
 'BSU_tRNA_12': ['GUU', 'GUC', 'GUG', 'GUA'],
 'BSU_tRNA_13': ['ACC', 'ACA', 'ACG', 'ACU'],
 'BSU_tRNA_14': ['AAG', 'AAA'],
 'BSU_tRNA_15': ['CUU', 'CUG', 'CUA', 'CUC', 'UUG', 'UUA'],
 'BSU_tRNA_16': ['GGU', 'GGG', 'GGA', 'GGC'],
 'BSU_tRNA_17': ['CUU', 'CUG', 'CUA', 'CUC', 'UUG', 'UUA'],
 'BSU_tRNA_18': ['AGG', 'AGA', 'CGA', 'CGG', 'CGC', 'CGU'],
 'BSU_tRNA_19': ['CCG', 'CCA', 'CCU', 'CCC'],
 'BSU_tRNA_2': ['GAU', 'GAC'],
 'BSU_tRNA_20': ['GCA', 'GCG', 'GCC', 'GCU'],
 'BSU_tRNA_21': ['AUG'],
 'BSU_tRNA_22': ['GAU', 'GAC'],
 'BSU_tRNA_23': ['AAC', 'AAU'],
 'BSU_tRNA_24': ['ACC', 'ACA', 'ACG', 'ACU'],
 'BSU_tRNA_25': ['GGU', 'GGG', 'GGA', 'GGC'],
 'BSU_tRNA_26': ['AGG', 'AGA', 'CGA', 'CGG', 'CGC', 'CGU'],
 'BSU_tRNA_27': ['CCG', 'CCA', 'CCU', 'CCC'],
 'BSU_tRNA_28': ['GCA', 'GCG', 'GCC', 'GCU'],
 'BSU_tRNA_29': ['AAC', 'AAU'],
 'BSU_tRNA_3': ['GAA', 'GAG'],
 'BSU_tRNA_30': ['AGC', 'AGU', 'UCU', 'UCG'

In [39]:
tRNA_data

0,1
common_name,tRNA-Leu
creation_date,3465237858
creator,|keseler|
frameid,|BSU_TRNA_57-tRNA|
gene,[u'|BSU_TRNA_57|']
modified_form,[u'|charged-BSU_TRNA_57-tRNA|']
names,[u'an uncharged tRNA']
overview_node_shape,|TEE|
pgdb,
schema_p,True


## Cleaved methionine

In [40]:
## Methionine cleaved feature dictionary
met_cleaved_features = []
for instance in bsub.amino_acid_sites.instances:
    instance_data = pc.PToolsFrame.PFrame.get_frame_data(instance)
    try :
        if instance.comment[0] == 'UniProt: Removed.':
            met_cleaved_features.append(instance.frameid)
    except:
        continue

In [41]:
met_cleaved_features

[u'|FTR8J2-35248|',
 u'|FTR8J2-32249|',
 u'|FTR8J2-29216|',
 u'|FTR8J2-26701|',
 u'|FTR8J2-25481|',
 u'|FTR8J2-23046|',
 u'|FTR8J2-21256|',
 u'|FTR8J2-34650|',
 u'|FTR8J2-31524|',
 u'|FTR8J2-28683|',
 u'|FTR8J2-26548|',
 u'|FTR8J2-25263|',
 u'|FTR8J2-22388|',
 u'|FTR8J2-21156|',
 u'|FTR8J2-36869|',
 u'|FTR8J2-34419|',
 u'|FTR8J2-30976|',
 u'|FTR8J2-28388|',
 u'|FTR8J2-26141|',
 u'|FTR8J2-24642|',
 u'|FTR8J2-22216|',
 u'|FTR8J2-20863|',
 u'|FTR8J2-36398|',
 u'|FTR8J2-33449|',
 u'|FTR8J2-30558|',
 u'|FTR8J2-27783|',
 u'|FTR8J2-25789|',
 u'|FTR8J2-24103|',
 u'|FTR8J2-21736|',
 u'|FTR8J2-15191|',
 u'|FTR8J2-35988|',
 u'|FTR8J2-32892|',
 u'|FTR8J2-29972|',
 u'|FTR8J2-27491|',
 u'|FTR8J2-25525|',
 u'|FTR8J2-23417|',
 u'|FTR8J2-21553|',
 u'|FTR8J2-35529|',
 u'|FTR8J2-35509|',
 u'|FTR8J2-32260|',
 u'|FTR8J2-29286|',
 u'|FTR8J2-26795|',
 u'|FTR8J2-25506|',
 u'|FTR8J2-23238|',
 u'|FTR8J2-21381|',
 u'|FTR8J2-5067|',
 u'|FTR8J2-34972|',
 u'|FTR8J2-31704|',
 u'|FTR8J2-28931|',
 u'|FTR8J2-26674|',
 

In [42]:
met_cleaved_prots = []
for protein in bsub.proteins.instances:
    protein_id = frameid_to_str(protein.frameid)
    gene_id = protein_id.split('-MONOMER')[0]
    try:
        features = protein.features
        if list(set(features) & set(met_cleaved_features)):
            met_cleaved_prots.append(gene_id)
    except:
        continue

In [43]:
string = ''
i = 0
for prot in met_cleaved_prots:
    i = i + 1
    string = string + "'" + prot + "'" + ','
    if not i%5:
        string = string + '\n'
print string

'BSU17410','BSU10790','BSU32150','BSU17460','BSU06480',
'BSU39920','BSU06180','BSU16100','BSU01390','BSU28230',
'BSU16150','BSU31390','BSU15990','BSU01440','BSU30540',
'BSU01290','BSU18000','BSU06850','BSU33540','BSU13900',
'BSU03520','BSU28310','BSU02890','BSU25020','BSU06150',
'BSU01050','BSU00730','BSU19530','BSU01410','BSU23860',
'BSU34790','BSU33940','BSU01100','BSU04730','BSU30190',
'BSU23040','BSU03130','BSU25410','BSU32710','BSU32890',
'BSU28440','BSU29120','BSU28500','BSU01340','BSU01020',
'BSU16250','BSU23470','BSU30650','BSU38550','BSU33910',
'BSU29660','BSU28870','BSU07000','BSU06030','BSU16170',
'BSU16690','BSU01310','BSU01150','BSU01700','BSU00510',
'BSU01200','BSU01040','BSU30120','BSU14580','BSU33400',
'BSU04190','BSU19550','BSU12290','BSU19240','BSU01120',
'BSU38140','BSU21870','BSU37660','BSU00110','BSU18030',
'BSU25480','BSU40030','BSU13180','BSU01780','BSU35000',
'BSU28430','BSU08820','BSU31350','BSU01250','BSU16500',
'BSU04400','BSU07830','BSU36830','BSU27320','BSU

## peptide_compartment_and_pathways

In [44]:
from Bio import SeqIO

gb_file = SeqIO.read('NC_000964.gb', 'gb')
element_types={'CDS'}

In [45]:
include_locations = ['|CCI-PM-BAC-POS-GP|','|CCI-CW-BAC-POS-GP|','|CCI-EXTRACELLULAR-GP|']
monomer_location_dict = {}
for protein in bsub.proteins.instances:
    protein_id = frameid_to_str(protein.frameid)
    protein_data = pc.PToolsFrame.PFrame.get_frame_data(protein)
    locations = protein_data.locations
    if locations and len(set(locations) & set(include_locations)):
        protein_id = protein_id.split('-MONOMER')[0]
        monomer_location_dict[protein_id] = []
        if '|CCI-PM-BAC-POS-GP|' in locations:
            monomer_location_dict[protein_id].append('PM')
        if '|CCI-CW-BAC-POS-GP|' in locations:
            monomer_location_dict[protein_id].append('CW')
        if '|CCI-EXTRACELLULAR-GP|' in locations:
            monomer_location_dict[protein_id].append('EX')
        

In [46]:
## Create FASTA file with AA sequences of membrane monomers
FASTA_file = 'membrane_genes.faa'
file = open(FASTA_file,'w')
for feature in gb_file.features:
    if feature.type not in element_types:
        continue
    try:
        gene_id = feature.qualifiers['old_locus_tag'][0]
    except:
        gene_id = feature.qualifiers['locus_tag'][0]
    if gene_id in monomer_location_dict.keys():
        try:
            seq = feature.qualifiers['translation'][0]
            file.write('>>'+gene_id+'\n')
            file.write(seq+'\n')
        except:
            pass
        
file.close()

In [47]:
## Create FASTA file with AA sequences of membrane monomers
FASTA_file = 'secreted_genes.faa'
file = open(FASTA_file,'w')
for feature in gb_file.features:
    if feature.type not in element_types:
        continue
    try:
        gene_id = feature.qualifiers['old_locus_tag'][0]
    except:
        gene_id = feature.qualifiers['locus_tag'][0]
    if gene_id in monomer_location_dict.keys() and 'EX' in monomer_location_dict[gene_id]:
        try:
            seq = feature.qualifiers['translation'][0]
            file.write('>>'+gene_id+'\n')
            file.write(seq+'\n')
        except:
            pass
        
file.close()

In [48]:
## FASTA_file is processed using SignalP 5.0 http://www.cbs.dtu.dk/services/SignalP/

In [49]:
pathways_file = open('output_protein_type.txt')
data = pathways_file.read()
parsed_data = data.split('\n')

In [50]:
protein_translocation_pathway_dict = {}
for instance in parsed_data:
    gene_id = instance[1:9]
    if 'BSU' in gene_id:
        if 'Tat' in instance:
            protein_translocation_pathway_dict[gene_id] = 't'
            print(instance)
        elif 'Sec' in instance:
            protein_translocation_pathway_dict[gene_id] = 's'
        else:
            protein_translocation_pathway_dict[gene_id] = 's' # Default Sec-pathway, since its the major one
            
## Note: ABC transport has not been included yet

_BSU14050	TAT(Tat/SPI)	0.007381	0.981474	0.003950	0.007195	CS pos: 31-32. GYA-RY. Pr: 0.7090
_BSU35410	TAT(Tat/SPI)	0.223413	0.388022	0.012648	0.375917	CS pos: 32-33. ANA-NT. Pr: 0.4810
_BSU38260	TAT(Tat/SPI)	0.007529	0.989730	0.001593	0.001148	CS pos: 45-46. TAA-KP. Pr: 0.6181


In [51]:
filename = 'peptide_compartment_and_pathways.txt'
file = open(filename,'w')

file.write('Complex' + '\t' + 'Complex_compartment' + '\t' + 'Protein' + '\t' + 'Protein_compartment' +
           '\t' + 'translocase_pathway''\n')

for protein_id in protein_complexes_dict.keys():
    genes_of_cplx = protein_complexes_dict[protein_id]
    if len(set(genes_of_cplx) & set(monomer_location_dict.keys())) == len(genes_of_cplx):
        for gene in genes_of_cplx:
            try:
                stoich  = str(int(bsub_stoich_dict[protein_id][gene]))
            except:
                stoich = '1'
            try:
                transloc_pathway = protein_translocation_pathway_dict[gene]
            except:
                transloc_pathway = 's'
            
            if 'PM' in monomer_location_dict[gene]:
                location = 'Inner_Membrane'
            elif 'CW' in monomer_location_dict[gene]:
                location = 'Outer_Membrane'
            #if 'EX' in monomer_location_dict[gene]:
            #    location = 'Secreted'
                
            file.write(protein_id + '\t' + location + '\t' + gene + '(' + stoich + ')' + '\t' +
                               location + '\t' + transloc_pathway + '\n')
file.close()

## Keffs

In [1]:
import cobra

In [2]:
# Define Models
ijo_directory = './iYO844.json'

ijo = cobra.io.load_json_model(ijo_directory)


In [3]:
central_CE = ['carbohydrate','energy']
central_AFN = ['amino_acid','fatty_acid','lipid','nucleotide']
intermediate = ['cofactor','coenzymes','prosthetic_groups']

def check_if_contained(ref_list,string):
    c = 0
    for i in ref_list:
        if i in string.lower():
            c = 1
            break
    return c

In [5]:
import re

central_CE_list = []
central_AFN_list = []
intermediate_list = []
secondary_list = []

classification_dict = dict()

delimiters = ' and ',' or '
regexPattern = '|'.join(map(re.escape, delimiters))
for rxn in ijo.reactions:
    if rxn.gene_reaction_rule:
        rule = rxn.gene_reaction_rule
        rule_list = re.split(regexPattern,rule)
        if check_if_contained(central_CE,rxn.subsystem):
            for gene in rule_list:
                gene = gene.replace('(','')
                gene = gene.replace(')','')
                central_CE_list.append(gene)
                classification_dict[gene] = 'central_CE'
        elif check_if_contained(central_AFN,rxn.subsystem):
            for gene in rule_list:
                gene = gene.replace('(','')
                gene = gene.replace(')','')
                classification_dict[gene] = 'central_AFN'
                central_AFN_list.append(gene)
        elif check_if_contained(intermediate,rxn.subsystem):
            for gene in rule_list:
                gene = gene.replace('(','')
                gene = gene.replace(')','')
                classification_dict[gene] = 'intermediate'
                intermediate_list.append(gene)
        else:
            for gene in rule_list:
                gene = gene.replace('(','')
                gene = gene.replace(')','')
                classification_dict[gene] = 'secondary'
                secondary_list.append(gene)
central_CE_list = list(set(central_CE_list))
central_AFN_list = list(set(central_AFN_list))
intermediate_list = list(set(intermediate_list))
secondary_list = list(set(secondary_list))

In [55]:
filename = 'reaction_median_keffs.txt'

subsystem_keffs_dict = {'central_CE':'79','central_AFN':'18','intermidiate':'5.2','secondary':'2.5'}

file = open(filename,'w')

reaction_versions = ['REV','FWD']

for protein in bsub.proteins.instances:
    protein_id = frameid_to_str(protein.frameid)
    protein_data = pc.PToolsFrame.PFrame.get_frame_data(protein)
    
    protein_type = []
    genes_of_cplx = pc.PGDB.genes_of_protein(bsub,protein)
    if genes_of_cplx:
        protein_type = []
        for gene in genes_of_cplx:
            try:
                gene_id = frameid_to_str(gene)
                protein_type.append(classification_dict[gene_id])
            except:
                continue
        protein_type = list(set(protein_type))
        if not protein_type:
            protein_type = ['unknown']
        try:
            protein_keff = subsystem_keffs_dict[protein_type[0]]
        except:
            protein_keff = '65'
    try:
        if protein_id in cplx_cofactor_dict.keys():
            string = protein_id
            for cofactor in cplx_cofactor_dict[protein_id].keys():
                stoich = int(cplx_cofactor_dict[protein_id][cofactor])
                if stoich == 1:
                    string = string + '_mod_' + cofactor
                else:
                    string = string + '_mod_' + str(stoich) + ':' + cofactor
            protein_id = string
        reaction_id = enz_rxn_assoc_dict[protein_id]
        for ver in reaction_versions:
            real_reaction_id = reaction_id + '_' + ver + '_' + protein_id
            file.write(real_reaction_id + '\t' + protein_id + '\t' + protein_keff + '\n')
    except:
        continue
    
file.close()

## Save modified m_model

In [56]:
m_model.optimize()

<Solution 0.12 at 0x7fa20c65d090>

In [57]:
cobra.io.save_json_model(m_model,'iYO844_mod.json')