In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import rpreactor

rpreactor.__version__

'0.0.12+61.g12dad48.dirty'

In [3]:
import logging

logging.basicConfig(level=logging.INFO)

# Advanced library usage

## Parallelization

You can use several cores to parallelize the tasks. However, note that current implementation is not ideal as it shows little gain in performance:

In [4]:
path_sqlite = "rr02_hs.sqlite3"
o = rpreactor.RuleBurner(db_path=path_sqlite, with_hs=True)

rules_d16 = [x[0] for x in o.db.execute("select id from rules where diameter=16 and direction<=0;")]
random_cid = o.chemicals[42]

tasks = [(rule, random_cid) for rule in rules_d16]
len(tasks)

INFO:rpreactor.rule.burner:Connected to a database with 229862 rules, 12490 molecules, and 666144 results (at 'rr02_hs.sqlite3').


21383

In [5]:
%%timeit
results = [x for x in o.compute(tasks, max_workers=1)]

1min 33s ± 3.6 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%%timeit
results = [x for x in o.compute(tasks, max_workers=2)]

1min 22s ± 1.03 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Direct database access

It can be useful to get a direct access to the database to select exactly the rules or chemicals you want to use. It is especially true if you use a database created from RetroRules.

In [7]:
o = rpreactor.RuleBurner(db_path="TMP_retrorules.sqlite3", with_hs=True)

INFO:rpreactor.rule.burner:Connected to a database with 229862 rules, 12490 molecules, and 666144 results (at 'TMP_retrorules.sqlite3').


For instance, you may want to use all rules at diameter 16 that will work in retrosynthesis direction for a specific metabolite:

In [8]:
%%time
rules_d16 = [x[0] for x in o.db.execute("select id from rules where diameter=16 and direction<=0;")]
len(rules_d16), rules_d16[:10]

CPU times: user 359 ms, sys: 160 ms, total: 519 ms
Wall time: 2.3 s


(21383,
 ['RR-02-af0959d1f4e9074d-16-F',
  'RR-02-75375bb3424a861f-16-F',
  'RR-02-3e613992ccfd1c0f-16-F',
  'RR-02-210fceec930d03aa-16-F',
  'RR-02-b4f23bc9461cf0f0-16-F',
  'RR-02-fc330d4fc04b3185-16-F',
  'RR-02-c8945a455022671f-16-F',
  'RR-02-1c7541df150fe56f-16-F',
  'RR-02-d58ff68a8484ba3f-16-F',
  'RR-02-c37e2f958d671813-16-F'])

In [9]:
%%time
random_cid = o.chemicals[42]

tasks = [(rule, random_cid) for rule in rules_d16]

results = [x for x in o.compute(tasks)]
len(results)

CPU times: user 28.9 s, sys: 8.67 s, total: 37.5 s
Wall time: 1min 24s


10

Another common query is to get all products that are at one reaction-step from a specific chemical, without using any promiscuity hypothesis:

In [10]:
%%time
chemical_id = 'MNXM1651'
rules = [x[0] for x in o.db.execute("select distinct rid from results where pgroup<0 and sid=?;", (chemical_id,))]

tasks = [(rule, chemical_id) for rule in rules]
results = [x for x in o.compute(tasks)]

# Distinct products at one reaction step from the chemical id, without promiscuity
distinct_products = set()
for x in results:
    for y in x['product_inchikeys']:
        for z in y:
            distinct_products.add(z)
distinct_products

CPU times: user 9.67 ms, sys: 2.72 ms, total: 12.4 ms
Wall time: 44.7 ms


{'ISWSIDIOOBJBQZ-UHFFFAOYSA-N',
 'JUIXSAKXHQVSIM-UHFFFAOYSA-N',
 'XLYOFNOQVPJJNP-UHFFFAOYSA-N',
 'YDRSQRPHLBEPTP-UHFFFAOYSA-N'}

Note that if you do not use the promiscuity hypothesis, you actually do not need to use the compute method and can retrieve results directly from the database using an SQL query:

In [11]:
%%time
chemical_id = 'MNXM1651'

ans = []
for row in o.db.execute("select distinct inchikey from results join molecules on pid=id where pgroup<0 and sid=?", (chemical_id,)):
    ans.append(row[0])
ans

CPU times: user 799 µs, sys: 227 µs, total: 1.03 ms
Wall time: 2.12 ms


['XLYOFNOQVPJJNP-UHFFFAOYSA-N',
 'ISWSIDIOOBJBQZ-UHFFFAOYSA-N',
 'JUIXSAKXHQVSIM-UHFFFAOYSA-N',
 'YDRSQRPHLBEPTP-UHFFFAOYSA-N']



Remember that identifiers are assumed by rpreactor to refer to distinct compounds, thus the queries above will search for known reactions that uses the specified identifier. Importantly, it will not retrieve all the reactions known to use the associated chemical structure. You can acheive the latter by querying the InChI first and then use them to make the query:


In [12]:
chemical_id = 'MNXM99'
for row in o.db.execute("select id, inchi from molecules where inchi=(select inchi from molecules where id=?)", (chemical_id,)):
    print(list(row))

['MNXM99', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM41', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM182', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM390', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM105', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM4637', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM146261', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM919', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM1919', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM18905', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM7967', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM48621', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM48393', 'InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2']
['MNXM7114', 'InChI=1S/C6H12O6/c7-1-2-3(8

In [13]:
chemical_id = 'MNXM99'
query = """
select distinct inchikey
from results
join molecules on pid=id
where pgroup<0 and sid=(
    select distinct id
    from molecules
    where id=?)
"""

ans = []
for row in o.db.execute(query, (chemical_id,)):
    ans.append(row[0])
ans  # Distinct products at one reaction step from the chemical structure, without promiscuity

['PVXPPJIGRGXGCY-UHFFFAOYSA-N',
 'CZMRCDWAGMRECN-UHFFFAOYSA-N',
 'ZFGVMMVMFNPHAQ-UHFFFAOYSA-N',
 'XLYOFNOQVPJJNP-UHFFFAOYSA-N',
 'IFBHRQDFSNCLOZ-UHFFFAOYSA-N',
 'LUEWUZLMQUOBSB-UHFFFAOYSA-N',
 'OITCGTKWORAONZ-UHFFFAOYSA-N',
 'HDTRYLNUVZCQOY-UHFFFAOYSA-N',
 'MUIAGSYGABVSAA-UHFFFAOYSA-N',
 'DJMVHSOAUQHPSN-UHFFFAOYSA-N',
 'RFSUNEUAIZKAJO-UHFFFAOYSA-N',
 'RULSWEULPANCDV-UHFFFAOYSA-N',
 'HXXFSFRBOHSIMQ-UHFFFAOYSA-N',
 'NBSCHQHZLSJFNQ-UHFFFAOYSA-N',
 'XTWYTFMLZFPYCI-UHFFFAOYSA-N',
 'UDMBCSSLTHHNCD-UHFFFAOYSA-N',
 'FJCUPROCOFFUSR-UHFFFAOYSA-N',
 'YGMBQDCBGPAZNW-UHFFFAOYSA-N',
 'FYGDTMLNYKFZSV-UHFFFAOYSA-N',
 'QIGJYVCQYDKYDW-UHFFFAOYSA-N',
 'RXVWSYJTUUKTEA-UHFFFAOYSA-N',
 'BNABBHGYYMZMOA-UHFFFAOYSA-N',
 'NBIIXXVUZAFLBC-UHFFFAOYSA-N',
 'FTNIPWXXIGNQQF-UHFFFAOYSA-N',
 'DLRVVLDZNNYCBX-UHFFFAOYSA-N',
 'FBPFZTCFMRRESA-UHFFFAOYSA-N',
 'XCCTYIAWTASOJW-UHFFFAOYSA-N',
 'GUBGYTABKSRVRQ-UHFFFAOYSA-N',
 'ZCLAHGAZPPEVDX-UHFFFAOYSA-N',
 'BJHIKXHVCXFQLS-UHFFFAOYSA-N',
 'PZDOWFGHCNHPQD-UHFFFAOYSA-N',
 'FZWBNH



This behavior is expected and due to the strategy used to sanitize/standardize the chemicals.
