# Filter's hyperparameters tuning

In [1]:
import sys
sys.path.append('..')
from grail_metabolism.utils.optuna import OptunaWrapper
from grail_metabolism.utils.preparation import MolFrame

ATTENTION: RXNMapper is not installed. Please install it first if you want to create your own set of rules.
ATTENTION: you use incorrect for rxnmapper version of rdkit


In [2]:
tune = OptunaWrapper() # Main class for Optuna

### val_frame

In [3]:
val_triples = MolFrame.read_triples('../grail_metabolism/data/val_triples.txt')
val_frame = MolFrame.from_file('../grail_metabolism/data/val.sdf', val_triples, standartize=False)

Processing reactions:   0%|          | 0/1061 [00:00<?, ?it/s]

Full preparation of val_frame - morganize, create graph molecular representations and binary vectors of reaction rules labels

In [4]:
val_frame.full_setup()

Morgan fingerprints generation


  0%|          | 0/36621 [00:00<?, ?it/s]

Pair graphs generation


  0%|          | 0/1035 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1035 [00:00<?, ?it/s]

Single graphs generation


  0%|          | 0/1035 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/36012 [00:00<?, ?it/s]

Some issue happened with this molecule:
I tensor([], size=(0, 18), dtype=torch.float64) tensor([[ -1.7835, -13.7837,   0.2648,   0.9855,  -0.0512,   6.1740,  16.5073,
          -0.8586,  12.9492,  -6.1578]], dtype=torch.float64)
Some issue happened with this molecule:
F tensor([], size=(0, 18), dtype=torch.float64) tensor([[ 0.3778, -2.9029, -0.0516,  0.0207, -0.0122,  0.2819,  0.6688, -0.5291,
         -0.0579,  0.9071]], dtype=torch.float64)


### train_frame

In [5]:
triples = MolFrame.read_triples('../grail_metabolism/data/train_triples.txt')
train_frame = MolFrame.from_file('../grail_metabolism/data/train.sdf', triples, standartize=False)

Processing reactions:   0%|          | 0/9182 [00:00<?, ?it/s]

In [6]:
del train_frame.map['O=C(OC1OC2COC(=O)c3cc(O)c(O)c(O)c3-c3c(cc(O)c(O)c3O)C(=O)OC2C2OC(=O)c3cc(O)c(O)c(O)c3-c3c(cc(O)c(O)c3O)C(=O)OC12)c1cc(O)c(O)c(Oc2c(O)c(O)c(O)c3c2C(=O)OC2C(COC(=O)c4cc(O)c(O)c(O)c4-3)OC(OC(=O)c3cc(O)c(O)c(O)c3)C3OC(=O)c4cc(O)c(O)c(O)c4-c4c(cc(O)c(O)c4O)C(=O)OC23)c1']
del train_frame.gen_map['O=C(OC1OC2COC(=O)c3cc(O)c(O)c(O)c3-c3c(cc(O)c(O)c3O)C(=O)OC2C2OC(=O)c3cc(O)c(O)c(O)c3-c3c(cc(O)c(O)c3O)C(=O)OC12)c1cc(O)c(O)c(Oc2c(O)c(O)c(O)c3c2C(=O)OC2C(COC(=O)c4cc(O)c(O)c(O)c4-3)OC(OC(=O)c3cc(O)c(O)c(O)c3)C3OC(=O)c4cc(O)c(O)c(O)c4-c4c(cc(O)c(O)c4O)C(=O)OC23)c1']

Full preparation of train_frame

In [7]:
train_frame.full_setup()

Morgan fingerprints generation


  0%|          | 0/308638 [00:00<?, ?it/s]

Pair graphs generation


  0%|          | 0/8977 [00:00<?, ?it/s]

  0%|          | 0/8862 [00:00<?, ?it/s]

  0%|          | 0/8977 [00:00<?, ?it/s]

Single graphs generation


  0%|          | 0/8977 [00:00<?, ?it/s]

  0%|          | 0/8862 [00:00<?, ?it/s]

  0%|          | 0/303721 [00:00<?, ?it/s]

Some issue happened with this molecule:
F tensor([], size=(0, 18), dtype=torch.float64) tensor([[ 0.3778, -2.9029, -0.0516,  0.0207, -0.0122,  0.2819,  0.6688, -0.5291,
         -0.0579,  0.9071]], dtype=torch.float64)
Some issue happened with this molecule:
I tensor([], size=(0, 18), dtype=torch.float64) tensor([[ -1.7835, -13.7837,   0.2648,   0.9855,  -0.0512,   6.1740,  16.5073,
          -0.8586,  12.9492,  -6.1578]], dtype=torch.float64)
Some issue happened with this molecule:
Br tensor([], size=(0, 18), dtype=torch.float64) tensor([[-0.8607, -9.1956,  0.1612,  0.5943, -0.0319,  3.7545, 10.1091, -0.5307,
          7.7046, -3.2581]], dtype=torch.float64)
Some issue happened with this molecule:
[Pt] tensor([], size=(0, 18), dtype=torch.float64) tensor([[ -4.9538, -21.7925,   3.7591, 751.3035, -57.7154, -22.4858,  -3.7198,
          -1.4666,  15.1470,  -6.7274]], dtype=torch.float64)
Some issue happened with this molecule:
[Pt+2] tensor([], size=(0, 18), dtype=torch.float64) tensor(

### Make study

In [13]:
import pickle as pkl
with open('../grail_metabolism/data/train.pkl', 'wb') as f, open('../grail_metabolism/data/val.pkl', 'wb') as g:
    pkl.dump(train_frame, f, protocol=pkl.HIGHEST_PROTOCOL)
    pkl.dump(val_frame, g, protocol=pkl.HIGHEST_PROTOCOL)

In [None]:
study = tune.make_study(val_frame, val_frame, 'filter')

[32m[I 2025-05-18 18:09:56,712][0m A new study created in memory with name: filter_pair[0m


Starting DataLoaders generation


  0%|          | 0/2 [00:00<?, ?it/s]