In [1]:
import json
import pandas as pd

bigg = json.load(open("C:\\Users\\02700282\\OneDrive - Elanco\\universal_model.json", "r"))
str(bigg)[:200]

"{'metabolites': [{'id': '4crsol_c', 'name': 'P-Cresol', 'compartment': '', 'notes': {'original_bigg_ids': ['4crsol', '4crsol_c', '_4crsol_c']}, 'annotation': [['KEGG Compound', 'http://identifiers.org"

Here we see that the annotation are nested lists bu those need to become dicts. So let's do the conversion.

In [2]:
from collections import defaultdict

def group_annotation(bigg_annotation : list) -> dict():
    """Group the BIGG annoations into a dictionary."""
    annotations = defaultdict(lambda: list())
    for ann in bigg_annotation:
        try:
            uri = ann[1].split("://identifiers.org/")[1]
            provider, ide = uri.split("/")
        except Exception:
            continue
        annotations[provider].append(ide)
    return dict(annotations)

for obj in ["metabolites", "reactions", "genes"]:
    for species in bigg[obj]:
        species["annotation"] = group_annotation(species["annotation"])

In [3]:
bigg["metabolites"][0]

{'id': '4crsol_c',
 'name': 'P-Cresol',
 'compartment': '',
 'notes': {'original_bigg_ids': ['4crsol', '4crsol_c', '_4crsol_c']},
 'annotation': {'kegg.compound': ['C01468'],
  'chebi': ['CHEBI:11981',
   'CHEBI:17847',
   'CHEBI:1816',
   'CHEBI:20352',
   'CHEBI:44726'],
  'hmdb': ['HMDB01858', 'HMDB13762'],
  'inchikey': ['IWDCLRJOBJJRNH-UHFFFAOYSA-N'],
  'biocyc': ['META:CPD-108'],
  'metanetx.chemical': ['MNXM828'],
  'seed.compound': ['cpd01042']}}

This looks correct now. Let's also fix the compartments.

In [4]:
bigg["compartments"]

{}

Let's have a look what is defined really.

In [5]:
for m in bigg["metabolites"]:
    m["compartment"] = m["id"].split("_")[-1]

compartments = set(m["compartment"] for m in bigg["metabolites"])    
compartments

{'c',
 'cm',
 'cx',
 'e',
 'f',
 'g',
 'h',
 'i',
 'im',
 'l',
 'm',
 'mm',
 'n',
 'p',
 'r',
 's',
 'u',
 'um',
 'v',
 'w',
 'x',
 'y'}

Now we try to annotate them with the default list in COBRAPY.

In [6]:
from cobra.medium.annotations import compartment_shortlist
bigg["compartments"] = {c: compartment_shortlist.get(c, ["unknown"])[0] for c in compartments}
bigg["compartments"]

{'u': 'thylakoid',
 'g': 'golgi',
 'v': 'vacuole',
 'cm': 'unknown',
 'c': 'cytoplasm',
 'im': 'mitochondrial intermembrane space',
 'f': 'flagellum',
 'mm': 'mitochondrial membrane',
 'x': 'peroxisome',
 'm': 'mitochondrion',
 'p': 'periplasm',
 'n': 'nucleus',
 'h': 'chloroplast',
 's': 'eyespot',
 'um': 'unknown',
 'y': 'unknown',
 'w': 'cell wall',
 'i': 'unknown',
 'e': 'extracellular',
 'l': 'lysosome',
 'r': 'unknown',
 'cx': 'unknown'}

Now we save the model to JSON and check whether we can read it well.

In [7]:
json.dump(bigg, open("universal_model_cobrapy.json", "w"))

In [8]:
from cobra.io import load_json_model
model = load_json_model("universal_model_cobrapy.json")

In [9]:
model

0,1
Name,bigg_universal
Memory address,2bba8e3e6d0
Number of metabolites,15638
Number of reactions,28301
Number of genes,0
Number of groups,0
Objective expression,0
Compartments,"cytoplasm, extracellular, periplasm, mitochondrion, peroxisome, unknown, nucleus, vacuole, golgi, thylakoid, lysosome, chloroplast, eyespot, flagellum, mitochondrial intermembrane space, unknown, unknown, unknown, unknown, mitochondrial membrane, cell wall, unknown"


In [10]:
model.metabolites[0].annotation

{'kegg.compound': ['C01468'],
 'chebi': ['CHEBI:11981',
  'CHEBI:17847',
  'CHEBI:1816',
  'CHEBI:20352',
  'CHEBI:44726'],
 'hmdb': ['HMDB01858', 'HMDB13762'],
 'inchikey': ['IWDCLRJOBJJRNH-UHFFFAOYSA-N'],
 'biocyc': ['META:CPD-108'],
 'metanetx.chemical': ['MNXM828'],
 'seed.compound': ['cpd01042']}

In [11]:
model.objective = model.reactions.BIOMASS_reaction

Looks like everythings is good :)

In [12]:
metabolitedf=pd.DataFrame(columns=["Name","Formula","Compartment","reactions"])
for i in model.metabolites:
    
    metabolitedf.loc[i.id,'Name']=i.name
    metabolitedf.loc[i.id,'Formula']=i.formula
    metabolitedf.loc[i.id,'Compartment']=i.compartment
    c = []
    for x in i.reactions:
        c.append(x.id)
    metabolitedf.loc[i.id,'reactions']=','.join(c)
#    modeldf.rename_axis("Reactions", axis='index', inplace=True)
metabolitedf.head(5)
#metabolitedf.to_excel('Metabolite_Info_Universal_model.xlsx')

Unnamed: 0,Name,Formula,Compartment,reactions
4crsol_c,P-Cresol,,c,"CRESt2ipp,DM_4crsol_c,TYRL"
aacald_c,Aminoacetaldehyde,,c,"FDMOtau,TAUDO,DM_aacald_c,AALDCDLsi,ALDD31"
amob_c,S-Adenosyl-4-methylthio-2-oxobutanoate,,c,"AMAOTr,DM_amob_c"
10fthf_c,10-Formyltetrahydrofolate,,c,"10FTHFGLULL,GARFT,BIOMASS_Gm_GS15_core_79p20M,..."
2fe2s_c,[2Fe-2S] iron-sulfur cluster,,c,"BIOMASS_UnmeasuredSOLUTES,S2FE2ST,BIOMASS_KT24..."


In [13]:
metabolitedf.head(5)

Unnamed: 0,Name,Formula,Compartment,reactions
4crsol_c,P-Cresol,,c,"CRESt2ipp,DM_4crsol_c,TYRL"
aacald_c,Aminoacetaldehyde,,c,"FDMOtau,TAUDO,DM_aacald_c,AALDCDLsi,ALDD31"
amob_c,S-Adenosyl-4-methylthio-2-oxobutanoate,,c,"AMAOTr,DM_amob_c"
10fthf_c,10-Formyltetrahydrofolate,,c,"10FTHFGLULL,GARFT,BIOMASS_Gm_GS15_core_79p20M,..."
2fe2s_c,[2Fe-2S] iron-sulfur cluster,,c,"BIOMASS_UnmeasuredSOLUTES,S2FE2ST,BIOMASS_KT24..."


In [14]:
t = metabolitedf[metabolitedf['Name'] == ' R  Acetoin C4H8O2']

In [15]:
t.head()

Unnamed: 0,Name,Formula,Compartment,reactions
actn__R_c,R Acetoin C4H8O2,,c,"ACTD2,ACLDC,ACTD_1,ACTNabc,ACALDCD,PYRDC2,ARSR..."
actn__R_e,R Acetoin C4H8O2,,e,"ACTNdiff,ACTNt2r,ACTNabc1,ACTNabc,ACTNtex,EX_a..."
actn__R_p,R Acetoin C4H8O2,,p,"ACTNtex,ACTNtpp"


In [16]:
t1 = metabolitedf[metabolitedf['Name'] == ' R R  2 3 Butanediol C4H10O2']

In [19]:
t1

Unnamed: 0,Name,Formula,Compartment,reactions
btd_RR_c,R R 2 3 Butanediol C4H10O2,,c,"BTDt_RR,BTDt6_RR,BTDDtpp,BTDD_RR"
btd_RR_e,R R 2 3 Butanediol C4H10O2,,e,"EX_btd_RR_e,BTDt_RR,BTDt6_RR,BTDDtex"
btd_RR_p,R R 2 3 Butanediol C4H10O2,,p,"BTDDtpp,BTDDtex"


In [17]:
for i in t['reactions']:
    i1 = i.split(',')
    for x in i1:
        print(x)
        print(model.reactions.get_by_id(x).build_reaction_string(use_metabolite_names = True))

ACTD2
 R  Acetoin C4H8O2 + Coenzyme A + Nicotinamide adenine dinucleotide --> Acetaldehyde + Acetyl-CoA + H+ + Nicotinamide adenine dinucleotide - reduced
ACLDC
(S)-2-Acetolactate + H+ -->  R  Acetoin C4H8O2 + CO2 CO2
ACTD_1
Diacetyl C4H6O2 + Nicotinamide adenine dinucleotide - reduced -->  R  Acetoin C4H8O2 + Nicotinamide adenine dinucleotide
ACTNabc
 R  Acetoin C4H8O2 + ATP C10H12N5O13P3 + H2O H2O -->  R  Acetoin C4H8O2 + ADP C10H12N5O10P2 + H+ + Phosphate
ACALDCD
2.0 Acetaldehyde -->  R  Acetoin C4H8O2
PYRDC2
Acetaldehyde + H+ + Pyruvate -->  R  Acetoin C4H8O2 + CO2 CO2
ARSR
 R  Acetoin C4H8O2 --> S-acetoin
ACTD
 R  Acetoin C4H8O2 + Nicotinamide adenine dinucleotide --> Diacetyl C4H6O2 + H+ + Nicotinamide adenine dinucleotide - reduced
ACTNdiff
 R  Acetoin C4H8O2 -->  R  Acetoin C4H8O2
ACTNtpp
 R  Acetoin C4H8O2 + H+ -->  R  Acetoin C4H8O2 + H+
ACTNabc1
 R  Acetoin C4H8O2 + ATP C10H12N5O13P3 + H2O H2O -->  R  Acetoin C4H8O2 + ADP C10H12N5O10P2 + H+ + Phosphate
ACTDa
 R  Acetoin C4H8

In [18]:
li = ['ACTNt2r','ACALDCD','EX_btd_RR_e']

NameError: name 'ACTNt2r' is not defined