# Examples highlighting eQuilibrator use on arbitrary compounds

In [2]:
import equilibrator_a as ea
from equilibrator_cache import Q_
from component_contribution.predict import GibbsEnergyPredictor
GP = GibbsEnergyPredictor()

Downloading package metadata...
Fragments already downloaded
0.2.10 equilibrator/cache
Downloading package metadata...
Fragments already downloaded
Downloading package metadata...
Fragments already downloaded


# Generating compound objects
To generate the compound objects there are two functions:

1. get_compound(mol_string)
1. gen_compound(mol_string)

**get_compound** checks the ccache first for a match and uses precomputed values to estimate ∆G. 

**gen_compound** is used in the event of no match. This function generates a compound object using cxcalc.




In [3]:
# get_compound first tries to find a compound in the ccache that matches the smiles. If not found it calculates
# using cxcalc. Both functions return a compound object
mol_string = 'CCO'
cpd_get = ea.get_compound(mol_string)
# gen_cpd creates a new compound using cxcalc
cpd_gen = ea.gen_compound(mol_string)

# An easy way to check if a compound was found or generated is checking the ids. A ccache compound has a positive id. A calculated compound has an id of -1.
print(f'cpd_get id: {cpd_get.id}')
print(f'cpd_gen id: {cpd_gen.id}')

cpd_get id: 287
cpd_gen id: -1


In [5]:
# For a compound not in ccache cpd_get and cpd_gen return the same value. Using an arbitrary compound here.
mol_string = 'C(O)CCCCCCCO'
cpd_get = ea.get_compound(mol_string)
# gen_cpd creates a new compound using cxcalc
cpd_gen = ea.gen_compound(mol_string)

# Both compounds have an id of -1, meaning the compound is not in ccache
print(f'cpd_get id: {cpd_get.id}')
print(f'cpd_gen id: {cpd_gen.id}')

cpd_get id: 51439
cpd_gen id: -1


# Calculating ∆Go and ∆G'o for compounds already in the compound_cache
The compound values in the compound cache should agree when there is no magnesium data and the compound is not in the training data. 

In [7]:
# Calculating ∆Go
# Comparing results from the two methods for a compound in ccache and NOT in the training data
mol_smiles = 'C(=O)CCCC(O)=O'
cpd_get = ea.get_compound(mol_smiles)
cpd_gen = ea.gen_compound(mol_smiles)
print(f'cpd_get id: {cpd_get.id}')
print(f'cpd_gen id: {cpd_gen.id}')

print('\nccache compound and generated compound give the same result for ∆Go')
print(GP.standard_dgf(cpd_get))
print(GP.standard_dgf(cpd_gen))

cpd_get id: 1280
cpd_gen id: -1

ccache compound and generated compound give the same result for ∆Go
(-453.0 +/- 1.3) kilojoule / mole
(-453.0 +/- 1.3) kilojoule / mole


In [9]:
# Calculating ∆G'o for a compound in the ccache without magensium data and not in the training data
mol_smiles = 'C(=O)CCCC(O)=O'
cond = {
    'p_h': Q_(7),
    'ionic_strength': Q_('0.1M'),
    'temperature': Q_('298.15K'),
    'p_mg': Q_(0)}
    
print(f'cpd_get id: {cpd_get.id}')
print(f'cpd_gen id: {cpd_gen.id}')

print('\nccache compound and generated compound give the same result for ∆G\'o')
print(GP.standard_dgf_prime(cpd_get, **cond))
print(GP.standard_dgf_prime(cpd_gen, **cond))

cpd_get id: 1280
cpd_gen id: -1

ccache compound and generated compound give the same result for ∆G'o
(-169.8 +/- 1.3) kilojoule / mole
(-169.8 +/- 1.3) kilojoule / mole


# Calculating ∆Go and ∆G'o for compounds not in the compound_cache

In [13]:
mol_string = 'OC(=O)CCOCCO'
cond = {
    'p_h': Q_(7),
    'ionic_strength': Q_('0.1M'),
    'temperature': Q_('298.15K'),
    'p_mg': Q_(0)}

cpd_get = ea.get_compound(mol_string)
print(f'SMILES: {mol_string}\ncpd_get id: {cpd_get.id}')
print(GP.standard_dgf_prime(cpd_get, **cond))



SMILES: OC(=O)CCOCCO
cpd_get id: -1
(-245.6 +/- 1.9) kilojoule / mole


# compound_cache vs generated -- Magnesium

In [35]:
mol_smiles = 'C1=NC2=C(C(=N1)N)N=CN2[C@H]3[C@@H]([C@@H]([C@H](O3)COP(=O)(O)OS(=O)(=O)O)OP(=O)(O)O)O'
cpd_get = ea.get_compound(mol_smiles)
cpd_gen = ea.gen_compound(mol_smiles)
cond = {
    'p_h': Q_(7),
    'ionic_strength': Q_('0.1M'),
    'temperature': Q_('298.15K'),
    'p_mg': Q_(0)}
print(f'cpd_get id: {cpd_get.id}')
print(f'cpd_gen id: {cpd_gen.id}')

print('\nccache compound and generated compound give different results for ∆G\'o')
print(GP.standard_dgf_prime(cpd_get, **cond))
print(GP.standard_dgf_prime(cpd_gen, **cond))

cpd_get id: 50
cpd_gen id: -1

ccache compound and generated compound give different results for ∆G'o
(-1945 +/- 5) kilojoule / mole
(-1944 +/- 5) kilojoule / mole


In [33]:
# Checking to see if cpd_get and cpd_gen have same values in the directory
print(set([i for i in dir(cpd_get) if not i.startswith('_')]).difference([i for i in dir(cpd_gen) if not i.startswith('_')]))
[i for i in dir(cpd_get) if not i.startswith('_')]

set()


['ORDER_OF_REGISTRIES',
 'atom_bag',
 'created_on',
 'dissociation_constants',
 'formula',
 'get_accession',
 'get_common_name',
 'group_vector',
 'id',
 'identifiers',
 'inchi',
 'inchi_key',
 'magnesium_dissociation_constants',
 'mass',
 'metadata',
 'microspecies',
 'net_charge',
 'smiles',
 'transform',
 'updated_on']

In [36]:
cpd_get.identifiers

[CompoundIdentifier(registry=Registry(namespace=bigg.metabolite), accession=paps),
 CompoundIdentifier(registry=Registry(namespace=bigg.metabolite), accession=M_paps),
 CompoundIdentifier(registry=Registry(namespace=chebi), accession=CHEBI:17980),
 CompoundIdentifier(registry=Registry(namespace=chebi), accession=CHEBI:11679),
 CompoundIdentifier(registry=Registry(namespace=chebi), accession=CHEBI:11680),
 CompoundIdentifier(registry=Registry(namespace=chebi), accession=CHEBI:1353),
 CompoundIdentifier(registry=Registry(namespace=chebi), accession=CHEBI:19857),
 CompoundIdentifier(registry=Registry(namespace=chebi), accession=CHEBI:58339),
 CompoundIdentifier(registry=Registry(namespace=hmdb), accession=HMDB01134),
 CompoundIdentifier(registry=Registry(namespace=hmdb), accession=HMDB62646),
 CompoundIdentifier(registry=Registry(namespace=kegg), accession=C00053),
 CompoundIdentifier(registry=Registry(namespace=metacyc.compound), accession=PAPS),
 CompoundIdentifier(registry=Registry(nam