## Generate MetaCyc-dependendt files

### protein_complexes.txt

In [91]:
import cobra
import os
import pandas as pd
import numpy as np
import urllib
import pythoncyc as pc
import re

## Metabolites

met_output_file = 'metabolites.txt'
m_model_file = './iYO844.json'

m_model = cobra.io.load_json_model(m_model_file)

## Fix of gene reaction rules
m_model.reactions.ACCOAC.gene_reaction_rule = '(BSU29200 and BSU29210) or BSU24350 or BSU24340 or BSU22440'
m_model.reactions.PDH.gene_reaction_rule = '(BSU14580 and BSU14590) or BSU14600 or BSU14610'
m_model.reactions.ACTD2.gene_reaction_rule = 'BSU08060 and BSU08070'
m_model.reactions.get_by_id('AIRC1').gene_reaction_rule = 'BSU06420 or BSU06430'
m_model.reactions.get_by_id('ANS').gene_reaction_rule = 'BSU00750 or BSU22680'
m_model.reactions.get_by_id('PRFGS_1').gene_reaction_rule = 'BSU06480 or BSU06470'
m_model.reactions.get_by_id('RNDR1').gene_reaction_rule = 'BSU17380 and BSU17390'
m_model.reactions.get_by_id('RNDR2').gene_reaction_rule = 'BSU17380 and BSU17390'
m_model.reactions.get_by_id('RNDR3').gene_reaction_rule = 'BSU17380 and BSU17390'
m_model.reactions.get_by_id('RNDR4').gene_reaction_rule = 'BSU17380 and BSU17390'


In [92]:
m_model.reactions.CYTB_B2.gene_reaction_rule

u'(BSU30710 and BSU30720) or (BSU38760 and BSU38750)'

In [93]:
bsub = pc.select_organism('bsub')

In [94]:
CPLXS = pc.PGDB.all_protein_complexes(bsub,filter='all')
ENZYMES = pc.PGDB.all_enzymes(bsub)

CPLXS = set(CPLXS) | set(ENZYMES)

CPLXS = list(CPLXS)

In [95]:
filename = 'protein_complexes.txt'
file = open(filename,'w')
CPLX_list = []
genes_all_cplxs = []

for cplx in CPLXS:
    
    cplx_string = str(cplx)
    cplx_string = cplx_string[1:len(cplx_string)-1]
    string = cplx_string + '\t' + 'default_name' + '\t'

    
    genes_of_cplx = pc.PGDB.genes_of_protein(bsub,cplx)
    genes_of_cplx_comp = []
    
    for gene in genes_of_cplx:
        gene_string = str(gene)
        gene_string = gene_string[1:len(gene_string)-1]
        
        genes_of_cplx_comp.append(gene_string)
        string = string + gene_string +'(1)' + ' AND '

    
    string = string[0:len(string)-5]
    string = string + '\t' + 'M_protein_recon' + '\n'
    
    file.write(string)
    
    CPLX_list.append(cplx_string)
    genes_all_cplxs.append(genes_of_cplx_comp)
file.close()

### enzyme_reaction_association.txt

In [123]:
filename = 'enzyme_reaction_association.txt'
prot_cplx_filename = 'protein_complexes.txt'

file = open(filename,'w')
prot_cplx_file = open(prot_cplx_filename,'a')

standard_gene_length = 8

enz_rxn_assoc_list = []
artificial_cplxs = []
artificial_id = 0;
for reaction in m_model.reactions:
    if not (reaction.id[0:3] == 'EX_') and not (reaction.id[0:3] == 'DM_'):
        string = str(reaction.id) + '\t' 

        rule_string = str(reaction.gene_reaction_rule)
            
        if rule_string:
            rule_string = rule_string.replace('(','')
            rule_string = rule_string.replace(')','')
            rule_list = rule_string.split(' or ')
            enz_rxn_assoc = []

            reaction_cplx_list = []
            for rule in rule_list:
                rule_gene_list = rule.split(' and ')

                for index in range(0,len(genes_all_cplxs)-1):
                    ref_rule = genes_all_cplxs[index]

                    if set(ref_rule) == set(rule_gene_list):
                        rule_cplx = CPLX_list[index]
                        reaction_cplx_list.append(rule_cplx)

            enz_rxn_assoc_list.append(reaction_cplx_list)
            string = str(reaction.id) + '\t'

            if reaction_cplx_list:
                for cplx in reaction_cplx_list:
                    string = string + cplx + ' OR '
            else:
                for rule in rule_list:
                    stoichiometry_string = ''
                    if len(rule) == standard_gene_length:
                        artificial_cplx = rule + '-MONOMER'
                        stoichiometry_string = rule + '(1)'
                    else:
                        artificial_id = artificial_id + 1
                        artificial_cplx = 'CPLX000-' + str(artificial_id)
                        cplx_gene_list = rule.split(' and ')

                        print cplx_gene_list
                        for gene in cplx_gene_list:
                            stoichiometry_string = stoichiometry_string + gene + '(1)' + ' AND '
                            stoichiometry_string = stoichiometry_string[0:len(stoichiometry_string)-4]
                    string = string + artificial_cplx + ' OR '
                    artificial_cplxs.append(artificial_cplx)
                    prot_cplx_file.write(artificial_cplx + '\t' + 'default_name' + '\t' + stoichiometry_string + '\t' + 'M_protein_recon' + '\n')

        else:
            string = string + 'EMPTY-RULE' + '\t' + reaction.name + '    '

        string = string[0:len(string)-4]
        string = string + '\n'
        file.write(string)
    
file.close() 
prot_cplx_file.close()
artificial_cplxs = list(set(artificial_cplxs))

['BSU07700', 'BSU13900', 'BSU13910']
['BSU30450', 'BSU30440', 'BSU30430', 'BSU30420', 'BSU30410']
['BSU30450', 'BSU30440', 'BSU30430', 'BSU30420', 'BSU30410']
['BSU19370', 'BSU19360', 'BSU14610']
['BSU25900', 'BSU17410', 'BSU01530', 'BSU25710', 'BSU35620']
['BSU39270', 'BSU13900', 'BSU13910']
['BSU28750', 'BSU28740', 'BSU28730']
['BSU23980', 'BSU23970', 'BSU23960']
['BSU36880', 'BSU36870', 'BSU36860', 'BSU36850', 'BSU36840', 'BSU36830', 'BSU36820', 'BSU36810', 'BSU36800']
['BSU33160', 'BSU33170', 'BSU33180']
['BSU13900', 'BSU13910', 'BSU38390']
['BSU13900', 'BSU13910', 'BSU05810', 'BSU05820', 'BSU05830']
['BSU38570', 'BSU38590', 'BSU38580', 'BSU13900', 'BSU13910']
['BSU36710', 'BSU12160']
['BSU36710', 'BSU27220']
['BSU36710', 'BSU18570']
['BSU38280', 'BSU38270', 'BSU38260']
['BSU08460', 'BSU08440', 'BSU08450']
['BSU33310', 'BSU33290', 'BSU33300', 'BSU39610']
['BSU27070', 'BSU27060', 'BSU27050', 'BSU27040', 'BSU13900', 'BSU13910']
['BSU14400', 'BSU13900', 'BSU13910']
['BSU02350', 'BSU13

In [99]:
id = 'VALt2r'
print m_model.reactions.get_by_id(id).name
print m_model.reactions.get_by_id(id).reaction
print m_model.reactions.get_by_id(id).gene_reaction_rule

L valine reversible transport via proton symport
h_e + val__L_e --> h_c + val__L_c
BSU26690 or (BSU26710 and BSU26700) or BSU29600


## TUs_from_bsubcyc.txt

In [36]:
filename = 'TUs_from_bsubcyc.txt'

for protein in CPLXS:
    TU = pc.PGDB.transcription_units_of_protein(bsub,protein)
    print protein
    print TU


|CPLX8J2-77|
[]
|CPLX8J2-91|
[]
|CPLX-7603|
[]
|CPLX8J2-101|
[]
|CPLX8J2-119|
[]
|CPLX8J2-14|
[]
|CPLX8J2-164|
[]
|CPLX8J2-179|
[]
|CPLX8J2-194|
[]
|CPLX8J2-32|
[]
|CPLX8J2-48|
[]
|CPLX8J2-64|
[]
|CPLX8J2-8|
[]
|CPLX8J2-94|
[]
|CPLX-8351|
[]
|CPLX8J2-104|
[]
|CPLX8J2-122|
[]
|CPLX8J2-142|
[]
|CPLX8J2-145|
[u'|TU8J2-1100|', u'|TU8J2-1101|']
|CPLX8J2-167|
[]
|CPLX8J2-181|
[]
|CPLX8J2-20|
[]
|CPLX8J2-36|
[]
|CPLX8J2-50|
[]
|CPLX8J2-67|
[]
|CPLX8J2-82|
[]
|CPLX8J2-97|
[u'|TU8J2-1287|', u'|TU8J2-1239|']
|CPLX-8648|
[]
|CPLX8J2-107|
[]
|CPLX8J2-125|
[]
|CPLX8J2-148|
[]
|CPLX8J2-17|
[]
|CPLX8J2-185|
[]
|CPLX8J2-23|
[]
|CPLX8J2-39|
[]
|CPLX8J2-53|
[]
|CPLX8J2-70|
[]
|CPLX8J2-85|
[]
|CPLX-9083|
[]
|CPLX8J2-110|
[u'|TU8J2-1292|']
|CPLX8J2-128|
[]
|CPLX8J2-157|
[]
|CPLX8J2-172|
[u'|TU8J2-994|', u'|TU8J2-1262|', u'|TU8J2-1261|', u'|TU8J2-1260|']
|CPLX8J2-188|
[]
|CPLX8J2-26|
[]
|CPLX8J2-41|
[]
|CPLX8J2-56|
[]
|CPLX8J2-73|
[]
|CPLX8J2-88|
[]
|CPLX-5322|
[]
|CPLX-9285|
[]
|CPLX8J2-113|
[]
|CPLX8J2-1

In [26]:
TU

[u'|TU8J2-833|']