# Refine BiGG IDs 

In [136]:
from pprint import pprint
import pandas
import json, re

# define the reaction translation
# reactions = pandas.read_table(open('BiGG_reactions.txt', 'r'), sep = '\t')
# reactions_dict = {}
# for index, met in reactions.iterrows():
#     reactions_dict[met['bigg_id']] = {
#         'name':met['name'],
#         'reaction_string':met['reaction_string']
#     }
# with open('BiGG_reactions, parsed.json', 'w') as out:
#     json.dump(reactions_dict, out, indent = 2)

# define the metabolite translation
metabolites = pandas.read_table(open('BiGG_metabolites.txt', 'r'), sep = '\t')
metabolites_dict = {}
for index, met in metabolites.iterrows():
    name = re.sub('\s[A-Z0-9]{3,}$', '', str(met['name']))
    name = name.strip()
    metabolites_dict[met['universal_bigg_id']] = {
        'name':name
    }
with open('BiGG_metabolites, parsed.json', 'w') as out:
    json.dump(metabolites_dict, out, indent = 2)
    
# pprint(metabolites_dict)
# pprint(reactions_dict)

# Refine the BiGG model

In [144]:
def split_reaction(reaction_string, bigg_metabolites):
    def _parse_stoich(met):
        stoich = ''
        ch_number = 0
        denom = False
        while re.search('[0-9\./]', met[ch_number]): 
            stoich += met[ch_number]
            if met[ch_number] == '/':
                numerator = stoich
                denom = True
            if denom:
                denominator += met[ch_number]
            ch_number += 1
            
        if denom:
            stoich = f'{numerator}/{denominator}'
        return stoich
    
    def met_parsing(met):
#         print(met)
        met = met.strip()
        if re.search('(\d\s\w|\d\.\d\s|\d/\d\s)', met):
            coefficient = _parse_stoich(met)
            coefficient = '{} '.format(coefficient)
        else:
            coefficient = ''
        met = re.sub(coefficient, '', met)
#         print(met, coefficient)
        return met, coefficient   

    def reformat_met_name(met_name, sabio = False):
        met_name = re.sub(' - ', '-', met_name)
        if not sabio:
            met_name = re.sub(' ', '_', met_name)
        return met_name
    
        
    # parse the reactants and products for the specified reaction string
    reaction_split = reaction_string.split('<->')
    reactants_list = reaction_split[0].split(' + ')
    products_list = reaction_split[1].split(' + ')
    
    # parse the reactants
    reactants = []
    sabio_reactants = []
    for met in reactants_list:
#         print(met)
        met = met.strip()
        met = re.sub('_\w$', '', met)
        met, coefficient = met_parsing(met)
        reactants.append(coefficient + reformat_met_name(bigg_metabolites[met]['name']))
        sabio_reactants.append(coefficient + reformat_met_name(bigg_metabolites[met]['name'], True))

    # parse the products
    products = []
    sabio_products = []
    for met in products_list:
        if not re.search('[a-z]', met, flags = re.IGNORECASE):
            continue
        met = met.strip()
        met = re.sub('_\w$', '', met)
        met, coefficient = met_parsing(met)
        products.append(coefficient + reformat_met_name(bigg_metabolites[met]['name']))
        sabio_products.append(coefficient + reformat_met_name(bigg_metabolites[met]['name'], True))

#     compounds = reactants + products
    reactant_string = ' + '.join(reactants)
    product_string = ' + '.join(products)
    reaction_string = ' <-> '.join([reactant_string, product_string])
    
    # construct the set of compounds in the SABIO format
    sabio_compounds = sabio_reactants + sabio_products
    
    return reaction_string, sabio_compounds

In [145]:
from pprint import pprint
import pandas
import json
%run ../dfbapy/dfba.py

bigg_reactions = json.load(open('BiGG_reactions, parsed.json'))
bigg_metabolites = json.load(open('BiGG_metabolites, parsed.json'))

# substitute the reaction and metabolite names
model = json.load(open('Ecoli core, BiGG, indented.json'))
model_contents = {}
for reaction in model['reactions']:
    # define the reaction identification
    reaction_id = reaction['id'] 
    reaction_name = bigg_reactions[reaction_id]['name']
    
    # substitute the reaction string
    og_reaction_string = bigg_reactions[reaction_id]['reaction_string']
#     print('\n\n', og_reaction_string)
    reaction_string, sabio_reaction_string, compounds = split_reaction(og_reaction_string, bigg_metabolites)
#     print(reaction_string)

    model_contents[reaction_name] = {
        'reaction': {
            'original': og_reaction_string,
            'substituted': reaction_string,
            'sabio': sabio_reaction_string
        },
        'chemicals': compounds,
        'annotations': reaction['annotation']
    }
    
# pprint(model_contents)
with open('processed_Ecoli_model.json', 'w') as out:
    json.dump(model_contents, out, indent = 3)