Integrate MetaNetX and BioCyc annotations into the MetaFlux SBML export.

In [15]:
species = 'lumbricus_rubellus' # either 'lumbricus_rubellus' or 'lumbricus_terrestris'

In [16]:
# use -python-local-only-non-strict when calling PathwayTools
import pythoncyc
from pythoncyc.PToolsFrame import PFrame
from pythoncyc.PTools import sendQueryToPTools, PToolsError
import json
pythoncyc.all_orgids()

['|GCF_004798725|', '|LRU|', '|LRT|', '|META|', '|ECOLI|']

In [17]:
import cobra
import os
cobra.Configuration.solver = 'glpk'

def get_biocyc_id(met: cobra.Metabolite):
    biocyc_id = met.annotation.get('biocyc')
    if biocyc_id:
        biocyc_id = biocyc_id[biocyc_id.index(':')+1:]
    return biocyc_id

In [18]:
with open("metanetx_annotations.json") as fp:
    metanetx_annotations = json.load(fp)

In [19]:
if species == 'lumbricus_terrestris':
    pgdb = pythoncyc.select_organism('lrt')
else:
    pgdb = pythoncyc.select_organism('lru')
metacyc = pythoncyc.select_organism('meta')

In [20]:
model = cobra.io.read_sbml_model(os.path.join(species, "MetaFlux/SBML_export.xml"))

In [21]:
import pandas as pd
bigg_metabolites = pd.read_csv("/scratch/vonkamp/gwdg_owncloud/cnapy-projects/a_woodii/Models_Acetogens/functions/bigg_models_metabolites.txt", sep='\t')

In [22]:
# validate MetaNetX mapping and create BiGG mapping
biocyc2bigg = dict()
native = 0
bigg_met = cobra.Metabolite("B")
for m,v in metanetx_annotations.items():
    try:
        biggID = [metacyc.get_slot_values(m, "DBLINKS").get("|BIGG|")[0]]
    except:
        biggID = None
    if biggID:
        biocyc2bigg[m] = biggID
        native += 1
        continue
    biggID = {a[43:].replace('M_', '').replace('-', '__') for a in v if a.startswith('http://bigg.ucsd.edu/universal/metabolites/')}
    valid = []
    for a in biggID:
        result = bigg_metabolites[bigg_metabolites.universal_bigg_id == a]["database_links"]
        if len(result) == 0:
            print(a, "appears obsolete")
        else:
            try:
                result = [s.split("META:")[1] for s in result.iloc[0].split(';') if "META:" in s]
                if m in result:
                    valid.append(a)
            except:
                print("Cannot resolve", m)
    biocyc2bigg[m] = valid
    if len(valid) > 1:
        print(m, valid)
print(native, len(biocyc2bigg))


Cannot resolve 5-PHOSPHONOOXY-L-LYSINE
CPD-170 ['stchs', 'stys']
Cannot resolve ARACHIDONYL-COA
Cannot resolve CPD66-28
Cannot resolve CPD-8609
Cannot resolve CPD-12173
Cannot resolve CPD-14276
Cannot resolve CPD-13534
Cannot resolve CPD-8343
LIPOIC-ACID ['lipoate', 'lipt']
Cannot resolve CPD-12647
Cannot resolve CPD-10505
Cannot resolve PENTANOYLCOA-CPD
Cannot resolve PENTANOYLCOA-CPD
Cannot resolve CPD-9720
Cannot resolve CPD-497
Cannot resolve CPD-9871
Cannot resolve CPD-17368
Cannot resolve PHYTOSPINGOSINE
Cannot resolve CPD-14283
Cannot resolve CPD-14018
glc appears obsolete
Cannot resolve PHTYOSPHINGOSINE-1-P
Cannot resolve CPD-14420
Cannot resolve CPD-4581
ALPHA-GLUCOSE ['Glc_aD', 'glc__aD']
Cannot resolve CH3-MALONATE-S-ALD
Cannot resolve CPD-547
Cannot resolve MEVALONATE
mev_R appears obsolete
Cannot resolve CPD-9406
Cannot resolve CPD-9873
Cannot resolve CPD-19741
Cannot resolve GLUTACONYL-COA
Cannot resolve GLUTACONYL-COA
Cannot resolve CPD-24318
Cannot resolve OLEOYL-COA
Ca

In [23]:
print(len(biocyc2bigg), sum(1 for v in biocyc2bigg.values() if v))

1256 495


In [24]:
for m in model.metabolites:
    biocyc_id = get_biocyc_id(m)
    if biocyc_id:
        a = biocyc2bigg.get(biocyc_id, None)
        if a:
            m.annotation['bigg.metabolite'] = a
            if len(a) > 1:
                print(m.id, a)

ALPHA_GLUCOSE_c ['Glc_aD', 'glc__aD']
AMMONIA_c ['nh4', 'nh3']
CPD_170_c ['stchs', 'stys']
LIPOIC_ACID_c ['lipoate', 'lipt']


In [26]:
for m in model.metabolites:
    biocyc_id = get_biocyc_id(m)
    if biocyc_id:
        try:
            pf = PFrame(biocyc_id, metacyc, getFrameData=True)
            if pf.synonyms is None:
                m.annotation['metacyc.synonyms'] = []
            else:
                m.annotation['metacyc.synonyms'] = pf.synonyms
        except:
            try:
                m.annotation['metacyc.synonyms'] = sendQueryToPTools(
                    f'(get-slot-values (nth 0 (with-organism (:org-id \'meta) (get-frame-labeled "{biocyc_id}"))) \'synonyms)')
            except:
                m.annotation['metacyc.synonyms'] = []
    else:
        m.annotation['metacyc.synonyms'] = []

In [27]:
# add BioCyc pathways to reaction annotation
for r in model.reactions:
    if not r.boundary:
        try:
            if '/' in r.name:
                idx = r.name.find('RXN')
                if idx == 0:
                    idx = r.name.find('-', r.name.find('-') + 1) # find second dash
                    biocyc_id = r.name[:idx]
                else:
                    biocyc_id = r.name[:idx + 3]
            else:
                biocyc_id = r.name
            ps = [p[1:-1] for p in pgdb.get_slot_values(biocyc_id, 'IN-PATHWAY')]
            if ps:
                r.annotation['biocyc.pathway'] = ps
                ps = [pgdb.get_slot_value(p, 'COMMON-NAME') for p in ps]
                r.annotation['biocyc.pathway-name'] = [(p if p else 'n/a') for p in ps] # replace None where no common name was defined
        except PToolsError as e:
            print(biocyc_id, e)

BIOMASS-EQUATION An internal error occurred in the running Pathway Tools application: :error Object "BIOMASS-EQUATION" is not coercible to a frame for KB LRUBASE
RXN66-1[CCO An internal error occurred in the running Pathway Tools application: :error Object "RXN66-1[CCO" is not coercible to a frame for KB LRUBASE
RXN-13451[CCO An internal error occurred in the running Pathway Tools application: :error Object "RXN-13451[CCO" is not coercible to a frame for KB LRUBASE
RXN-13451[CCO An internal error occurred in the running Pathway Tools application: :error Object "RXN-13451[CCO" is not coercible to a frame for KB LRUBASE
RXN-13451[CCO An internal error occurred in the running Pathway Tools application: :error Object "RXN-13451[CCO" is not coercible to a frame for KB LRUBASE
RXN-13451[CCO An internal error occurred in the running Pathway Tools application: :error Object "RXN-13451[CCO" is not coercible to a frame for KB LRUBASE
RXN-13451[CCO An internal error occurred in the running Pathwa

In [28]:
cobra.io.write_sbml_model(model, os.path.join(species, "MetaFlux/SBML_export_augmented.sbml"))