In [1]:
import os
import pandas as pd
import numpy as np
import cobra
from tqdm import tqdm

from dGbyG.utils.ChemFunc import build_equation, to_mol
from dGbyG.api import Compound, Reaction



In [2]:
default_T = 298.15
default_I = 0.25
default_pH = 7.0
default_pMg = 14.0

conditions = {'c':{'pH':7.20, 'e_potential':0, 'T':default_T, 'I':default_I, 'pMg':default_pMg},
              'e':{'pH':7.40, 'e_potential':30 * 1e-3, 'T':default_T, 'I':default_I, 'pMg':default_pMg},
              'n':{'pH':7.20, 'e_potential':0, 'T':default_T, 'I':default_I, 'pMg':default_pMg},
              'r':{'pH':7.20, 'e_potential':0, 'T':default_T, 'I':default_I, 'pMg':default_pMg},
              'g':{'pH':6.35, 'e_potential':0, 'T':default_T, 'I':default_I, 'pMg':default_pMg},
              'l':{'pH':5.50, 'e_potential':19 * 1e-3, 'T':default_T, 'I':default_I, 'pMg':default_pMg},
              'm':{'pH':8.00, 'e_potential':-155 * 1e-3, 'T':default_T, 'I':default_I, 'pMg':default_pMg},
              'i':{'pH':8.00, 'e_potential':-155 * 1e-3, 'T':default_T, 'I':default_I, 'pMg':default_pMg},
              'x':{'pH':7.00, 'e_potential':12 * 1e-3, 'T':default_T, 'I':default_I, 'pMg':default_pMg}}

### 1. Predicting standard Gibbs energy for Recon3D

In [3]:
recon3d = cobra.io.load_matlab_model('../data/Recon3D/Recon3D_301.mat')
S = cobra.util.array.create_stoichiometric_matrix(recon3d) # shape = [met, rxn]

# patch
recon3d.metabolites.get_by_id('aqcobal[e]').annotation['pubchem.compound'] = ['4238']
recon3d.metabolites.get_by_id('aqcobal[c]').annotation['pubchem.compound'] = ['4238']
recon3d.metabolites.get_by_id('yvite[e]').annotation['kegg.compound'] = ['C02483']

Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-29


No defined compartments in model Recon3D. Compartments will be deduced heuristically using regular expressions.
Using regular expression found the following compartments:c, e, g, i, l, m, n, r, x


In [4]:
ID_types = set()
met_ids = set()
for met in recon3d.metabolites:
    met_ids.add(met.id[:-3])
    for key, value in met.annotation.items():
        assert len(value) == 1
        ID_types.add(key)
ID_types

{'SMILES', 'chebi', 'hmdb', 'inchi', 'kegg.compound', 'pubchem.compound'}

In [5]:
mets_df = pd.DataFrame(columns=list(ID_types), index=list(met_ids))
for met in recon3d.metabolites:
    mets_df.loc[met.id[:-3], 'recon3d'] = met.id[:-3]
    for key, value in met.annotation.items():
        if pd.isna(mets_df.loc[met.id[:-3], key]):
            mets_df.loc[met.id[:-3], key] = value[0]
        else:
            assert mets_df.loc[met.id[:-3], key] == str(value[0])
mets_df.rename(columns={'kegg.compound':'kegg'}, inplace=True)
mets_df = mets_df.loc[:,['inchi', 'kegg', 'recon3d', 'SMILES', 'chebi', 'hmdb', ]]

In [6]:
mols_dict = {}
# obtain a list of mol objects using `to_mol`
for met in recon3d.metabolites:
    recon3d_id = met.id[:-3]

    mol = None

    for cid_type, cid in mets_df.loc[recon3d_id, :].items():
        if pd.notna(cid):
            mol = to_mol(cid=cid, cid_type=cid_type)
        if mol:
            break

    compound = Compound(mol,)
    compound.condition = conditions[met.compartment]
    mols_dict[met.id] = mol
    met.compound = compound

sum([x is not None for x in np.array(list(mols_dict.values()))])

7387

In [7]:
dGf = []
for met in tqdm(recon3d.metabolites):
    dGf.append(met.compound.transformed_standard_dGf_prime)

dGf = np.array(dGf)
sum(~np.isnan(dGf))

100%|██████████| 8399/8399 [06:20<00:00, 22.06it/s] 


array([7387, 7387])

In [8]:
dGf_df = pd.DataFrame(dGf, columns=['transformed standard dGf', 'SD'], index=[met.id for met in recon3d.metabolites])
#dGf_df.to_csv('../data/Recon3D/Recon3D_standard_dGf_dGbyG.csv')

In [9]:
dGr = []
for rxn in tqdm(recon3d.reactions):
    rxn_dict = dict([(met.compound, coeff) for met, coeff in rxn.metabolites.items()])
    r = Reaction(rxn_dict)
    dGr.append(r.transformed_standard_dGr_prime)

dGr = np.array(dGr)
sum(~np.isnan(dGr))

100%|██████████| 13543/13543 [01:11<00:00, 188.35it/s]


array([9626, 9626])

In [10]:
dGr_df = pd.DataFrame(dGr, columns=['transformed standard dGr', 'SD'], index=[rxn.id for rxn in recon3d.reactions])
#dGr_df.to_csv('../data/Recon3D/Recon3D_standard_dGr_dGbyG.csv')

In [18]:
metNoComp = {}
for met in recon3d.metabolites:
    if metNoComp.get(met.id[:-3], None) is None:
        metNoComp[met.id[:-3]] = met.compound.mol
sum([x is not None for x in metNoComp.values()]), len(metNoComp), \
sum([x is not None for x in metNoComp.values()])/len(metNoComp)

(3451, 4140, 0.8335748792270531)

### 2. Predicting standard Gibbs energy for Human1

In [19]:
Human_GEM_path = "../../Human-GEM"

# Read model and patch it
human1 = cobra.io.read_sbml_model(os.path.join(Human_GEM_path, "model/Human-GEM.xml"))
human1.metabolites.get_by_id('MAM01935e').annotation['kegg.compound'] = 'C02483'

metabolites_df = pd.read_csv(os.path.join(Human_GEM_path, "model/metabolites.tsv"), sep='\t')
reactions_df = pd.read_csv(os.path.join(Human_GEM_path, "model/reactions.tsv"), sep='\t')

rxns_df = reactions_df.set_index('rxns', inplace=False)
mets_df = metabolites_df.set_index('mets', inplace=False)

In [20]:
IDs = set({})
for met in human1.metabolites:
    for key, value in met.annotation.items():
        IDs |= set([key])
IDs

{'bigg.metabolite',
 'chebi',
 'hmdb',
 'inchi',
 'kegg.compound',
 'lipidmaps',
 'metanetx.chemical',
 'pubchem.compound',
 'sbo',
 'vmhmetabolite'}

In [21]:
mols_dict = {}
# obtain a list of compound objects using `get_compound`
for met in human1.metabolites:
    kegg = met.annotation.get('kegg.compound')
    chebi = met.annotation.get('chebi')
    hmdb = met.annotation.get('hmdb')
    recon3d_id = mets_df.loc[met.id,'metRecon3DID'] if pd.notna(mets_df.loc[met.id,'metRecon3DID']) else None
    metanetx = met.annotation.get('metanetx.chemical')
    lipidmaps = met.annotation.get('lipidmaps')
    inchi = met.annotation.get('inchi')
    
    mol = None
    
    if (mol is None) and inchi:
        mol = to_mol(inchi, 'inchi')
        pass
    if (mol is None) and kegg:
        mol = to_mol(kegg, "kegg")
        pass
    if (mol is None) and recon3d_id:
        mol = to_mol(recon3d_id, "recon3d")
        pass
    if (mol is None) and chebi:
        chebi = chebi[0] if type(chebi)==list else chebi
        mol = to_mol(chebi, "chebi")
        pass
    if (mol is None) and hmdb:
        hmdb = hmdb[0] if type(hmdb)==list else hmdb
        mol = to_mol(hmdb, "hmdb")
        pass
    if (mol is None) and metanetx:
        metanetx = metanetx[0] if type(metanetx)==list else metanetx
        mol = to_mol(metanetx, "metanetx")
        pass
    if (mol is None) and lipidmaps:
        lipidmaps = lipidmaps[0] if type(lipidmaps)==list else lipidmaps
        mol = to_mol(lipidmaps, "lipidmaps")
        pass

    mols_dict[met.id] = mol

    met.met = met.id
    met.kegg = kegg
    met.chebi = chebi
    met.hmdb = hmdb
    met.recon3d = recon3d_id
    met.metanetx = metanetx
    met.lipidmaps = lipidmaps
    met.inchi = inchi
    met.mol = mol
    met.compound = Compound(mol,)
    met.compound.condition = conditions[met.compartment]
    
sum([x is not None for x in np.array(list(mols_dict.values()))])

7495

In [38]:
dGf = []
for met in tqdm(human1.metabolites):
    dGf.append(met.compound.transformed_standard_dGf_prime)

dGf = np.array(dGf)
sum(~np.isnan(dGf))

100%|██████████| 8456/8456 [10:52<00:00, 12.96it/s] 


array([7495, 7495])

In [39]:
dGf_df = pd.DataFrame(dGf, columns=['transformed standard dGf', 'SD'], index=[met.id for met in human1.metabolites])
#dGf_df.to_csv('../data/Human1/Human1_standard_dGf_dGbyG.csv')

In [40]:
dGr = []
for rxn in tqdm(human1.reactions):
    rxn_dict = dict([(met.compound, coeff) for met, coeff in rxn.metabolites.items()])
    r = Reaction(rxn_dict)
    dGr.append(r.transformed_standard_dGr_prime)

dGr = np.array(dGr)
sum(~np.isnan(dGr))

100%|██████████| 12995/12995 [01:24<00:00, 152.97it/s]


array([9697, 9697])

In [22]:
dGr_df = pd.DataFrame(dGr, columns=['transformed standard dGr', 'SD'])
#dGr_df.to_csv('../data/Human1/Human1_standard_dGr_dGbyG.csv')

In [63]:
metNoComp = {}
for met in human1.metabolites:
    if metNoComp.get(met.id[:-1], None) is None:
        metNoComp[met.id[:-1]] = met.compound.mol
sum([x is not None for x in metNoComp.values()]), len(metNoComp), \
sum([x is not None for x in metNoComp.values()])/len(metNoComp)

(3514, 4156, 0.8455245428296438)