In [1]:
import setpypath

from dataclasses import dataclass, asdict
from typing import Iterable, List, Mapping, Optional

import cobra
import json
import pymongo


In [2]:
import cobra.test
bigg_core = cobra.test.create_test_model("ecoli")  # iJO1366
print(bigg_core.description)

Scaling...
 A: min|aij| =  1.000e+00  max|aij| =  1.000e+00  ratio =  1.000e+00
Problem data seem to be well scaled
Escherichia coli str. K-12 substr. MG1655


In [3]:
for i, m in enumerate(bigg_core.metabolites):
    if m.id.startswith("atp"):
        print(i, m.id)

306 atp_c


In [4]:
atp = bigg_core.metabolites[306]
print(atp.annotation)

{'bigg.metabolite': 'atp', 'biocyc': ['META:ATP', 'META:CPD0-1634'], 'chebi': ['CHEBI:10789', 'CHEBI:10841', 'CHEBI:13236', 'CHEBI:15422', 'CHEBI:22249', 'CHEBI:2359', 'CHEBI:237958', 'CHEBI:30616', 'CHEBI:40938', 'CHEBI:57299'], 'kegg.compound': 'C00002', 'kegg.drug': 'D08646', 'metanetx.chemical': 'MNXM3', 'seed.compound': 'cpd00002'}


In [5]:
bigg_core.reactions[1000].annotation

{'sbo': 'SBO:0000375',
 'bigg.reaction': 'DHPPDA2',
 'biocyc': 'META:RIBOFLAVINSYNDEAM-RXN',
 'ec-code': '3.5.4.26',
 'kegg.reaction': 'R03459',
 'metanetx.reaction': 'MNXR97435',
 'rhea': ['21868', '21869', '21870', '21871']}

In [44]:
for m in bigg_core.reactions[1000].metabolites:
    print(m.summary())

25drapp_c
Formula: C9H14N5O8P

Producing Reactions
-------------------
Percent      Flux Reaction                                                Definition
100.00% 0.0004381  GTPCII2 gtp_c + 3.0 h2o_c --> 25drapp_c + for_c + 2.0 h_c + ppi_c

Consuming Reactions
-------------------
Percent       Flux Reaction                                  Definition
100.00% -0.0004381  DHPPDA2 25drapp_c + h2o_c + h_c --> 5apru_c + nh4_c
h2o_c
=====
Formula: H2O

Producing Reactions
-------------------
Percent      Flux   Reaction                                                                      Definition
  3.82%     4.858     ACONTa                                                      cit_c <=> acon_C_c + h2o_c
 43.88%     55.82   ATPS4rpp                              adp_c + 4.0 h_p + pi_c <=> atp_c + h2o_c + 3.0 h_c
  0.00% 0.0004381     CPPPGO                   cpppg3_c + 2.0 h_c + o2_c --> 2.0 co2_c + 2.0 h2o_c + pppg9_c
 27.63%     35.15 CYTBO3_4pp                          4.0 h_c + 0.5 o2_c

h_c
===
Formula: H

Producing Reactions
-------------------
Percent      Flux                       Reaction                                                                      Definition
  0.03%   0.07646                         ACCOAC                      accoa_c + atp_c + hco3_c --> adp_c + h_c + malcoa_c + pi_c
  0.10%    0.2906                           ACGS                                    accoa_c + glu__L_c --> acglu_c + coa_c + h_c
  0.00% 0.0006572                           ADCL                                               4adcho_c --> 4abz_c + h_c + pyr_c
  0.00% 0.0006611                          ADNK1                                           adn_c + atp_c --> adp_c + amp_c + h_c
  0.08%    0.2435                           ADSK                                          aps_c + atp_c --> adp_c + h_c + paps_c
  0.20%    0.5875                           ADSS                   asp__L_c + gtp_c + imp_c --> dcamp_c + gdp_c + 2.0 h_c + pi_c
  0.15%    0.4401                    

5apru_c
Formula: C9H13N4O9P

Producing Reactions
-------------------
Percent      Flux Reaction                                  Definition
100.00% 0.0004381  DHPPDA2 25drapp_c + h2o_c + h_c --> 5apru_c + nh4_c

Consuming Reactions
-------------------
Percent       Flux Reaction                                    Definition
100.00% -0.0004381   APRAUR 5apru_c + h_c + nadph_c --> 5aprbu_c + nadp_c
nh4_c
=====
Formula: H4N

Producing Reactions
-------------------
Percent      Flux Reaction                                                 Definition
  1.36%    0.1512    CYSTL            cyst__L_c + h2o_c --> hcys__L_c + nh4_c + pyr_c
  0.00% 0.0004381  DHPPDA2                25drapp_c + h2o_c + h_c --> 5apru_c + nh4_c
  0.48%   0.05364    GLYCL gly_c + nad_c + thf_c --> co2_c + mlthf_c + nadh_c + nh4_c
  0.02%  0.001753     HMBS                h2o_c + 4.0 ppbng_c --> hmbil_c + 4.0 nh4_c
 95.56%     10.61   NH4tpp                                            nh4_p <=> nh4_c
  2.57%    0.2854 

In [6]:
@dataclass
class Descriptor:
    name: str
    aka: Optional[List[str]] = None
    crossref: Optional[List[str]] = None


@dataclass
class Molecule:
    id: str
    descriptor: Descriptor
    formula: Optional[str] = None
    charge: Optional[int] = None
    molecular_weight: Optional[float] = None
    localization: Optional[str] = None
    formation_delta_g: Optional[float] = None


@dataclass
class Reaction:
    id: str
    descriptor: Descriptor
    stoichiometry: Mapping[str, float]
    catalyst: str
    reversible: bool = True
    standard_delta_g: Optional[float] = None

        
def extract_descriptor(bigg_obj):
    fields = {"name": bigg_obj.name}
    if bigg_obj.annotation:
        fields["crossref"] = [f"[{k}]:[{v}]" for k, v in bigg_obj.annotation.items()]
    
    return Descriptor(**fields)


def bigg_to_molecule(bigg_molecule):
    return Molecule(
        id = bigg_molecule.id,
        descriptor = extract_descriptor(bigg_molecule),
        formula = bigg_molecule.formula,
        charge = bigg_molecule.charge,
        molecular_weight = bigg_molecule.formula_weight,
        localization = bigg_molecule.compartment,
    )


def bigg_to_reaction(bigg_reaction):
    return Reaction(
        id = bigg_reaction.id,
        descriptor = extract_descriptor(bigg_reaction),
        stoichiometry = {m.id: count for m, count in bigg_reaction.metabolites.items()},
        catalyst = bigg_reaction.gene_reaction_rule,
        reversible = bigg_reaction.reversibility,
    )


In [7]:
molecules = {}
for m in bigg_core.metabolites:
    molecule = bigg_to_molecule(m)
    molecules[molecule.id] = molecule
print(f"Imported {len(molecules)} molecules")

Imported 1805 molecules


  warn("The element %s does not appear in the periodic table" % e)
  warn("The element %s does not appear in the periodic table" % e)


In [8]:
reactions = {}
exchanges = set()
biomass = {}

for r in bigg_core.reactions:
    if len(r.metabolites) == 1:
        exchanges.add(next(iter(r.metabolites)).id)
    elif r.id.startswith("BIOMASS"):
        biomass[r.id] = {m.id: count for m, count in r.metabolites.items()}
    else:
        reaction = bigg_to_reaction(r)
        reactions[reaction.id] = reaction
        
print(f"Imported {len(reactions)} reactions,"
      f" {len(exchanges)} exchanges,"
      f" {len(biomass)} biomass objectives")

Imported 2251 reactions, 330 exchanges, 2 biomass objectives


In [9]:
biomass.keys()

dict_keys(['BIOMASS_Ec_iJO1366_WT_53p95M', 'BIOMASS_Ec_iJO1366_core_53p95M'])

In [36]:
kb = pymongo.MongoClient("mongodb://127.0.0.1:27017").kb

In [37]:
def put_molecules(molecules, batch_size=100):
    i = 0
    written = 0
    while i < len(molecules):
        batch = []
        for molecule in molecules[i:i+batch_size]:
            batch.append(asdict(molecule))
        print(f"{written} molecules written, writing {len(batch)}...")
        result = kb.molecules.insert_many(batch, ordered=False)
        written += len(result.inserted_ids)
        i += batch_size
    print(f"{written} molecules total")

def put_reactions(reactions, batch_size=100):
    i = 0
    written = 0
    while i < len(reactions):
        batch = []
        for reaction in reactions[i:i+batch_size]:
            batch.append(asdict(reaction))
        print(f"{written} reactions written, writing {len(batch)}...")
        result = kb.reactions.insert_many(batch, ordered=False)
        written += len(result.inserted_ids)
        i += batch_size
    print(f"{written} reactions total")


In [38]:
put_reactions(list(reactions.values()))

0 reactions written, writing 100...
100 reactions written, writing 100...
200 reactions written, writing 100...
300 reactions written, writing 100...
400 reactions written, writing 100...
500 reactions written, writing 100...
600 reactions written, writing 100...
700 reactions written, writing 100...
800 reactions written, writing 100...
900 reactions written, writing 100...
1000 reactions written, writing 100...
1100 reactions written, writing 100...
1200 reactions written, writing 100...
1300 reactions written, writing 100...
1400 reactions written, writing 100...
1500 reactions written, writing 100...
1600 reactions written, writing 100...
1700 reactions written, writing 100...
1800 reactions written, writing 100...
1900 reactions written, writing 100...
2000 reactions written, writing 100...
2100 reactions written, writing 100...
2200 reactions written, writing 51...
2251 reactions total
