In [1]:
import datetime
import numpy as np
from rdkit import Chem

from gryffin import Gryffin
from LaserDataHandler import LaserDataHandler, run_two_step_suzuki

## Establish the Database Connection and Process all Previous Data

- log in to the database through the LaserDataHandler
- load all previous data
- process all completed experiments and generate the observations for Gryffin

In [2]:
iteration = datetime.date.today().strftime("%Y%m%d")
no_samples = 12
labs = ["Illinois"] # Toronto?
fragments = ("fragment_a", "fragment_b", "fragment_c")

data_handler = LaserDataHandler(
    db_name="madness_laser",
    fragments=fragments,
    active_labs=labs
)

In [3]:
in_progress, completed = data_handler.load_previous_results()

print(f"Currently in Progress: {in_progress.shape[0]}")
print(f"Completed Experiments: {completed.shape[0]}")

Currently in Progress: 28
Completed Experiments: 258


In [4]:
observations, used_fragments = data_handler.process_previous_results(
    previous_results=completed,
    get_target_property=lambda x: x.get("product.optical_properties").get("gain_cross_section")
)

print(f"{len(observations)} Observations were created for Gryffin.")
print(f"Used Fragments:", ", ".join([f"{frag} ({len(used_fragments[frag])})" for frag in used_fragments]))

258 Observations were created for Gryffin.
Used Fragments: fragment_a (29), fragment_b (47), fragment_c (52)


## Prepare the Fragment Space

- load all available fragments to the LaserDataHandler
- load all descriptors (currently from a file on the hard drive)

In [5]:
available_fragments = data_handler.get_all_available_fragments()

all_fragments = {frag: available_fragments[frag] | used_fragments[frag] for frag in fragments}

print("Available Fragments:", ", ".join([f"{frag} ({len(available_fragments[frag])})" for frag in fragments]))
print("Total Fragments:    ", ", ".join([f"{frag} ({len(all_fragments[frag])})" for frag in fragments]))

Available Fragments: fragment_a (7), fragment_b (20), fragment_c (117)
Total Fragments:     fragment_a (31), fragment_b (55), fragment_c (161)


In [6]:
all_fragments_with_descriptors = {frag_type: dict() for frag_type in fragments}

for frag_type in fragments:
    for frag in all_fragments[frag_type]:
        descriptors = list(data_handler.get_molecule(frag).at[0, "descriptors"].values())

        if not descriptors:
            raise ValueError(f"No descriptors loaded for fragment {frag}!!!")

        all_fragments_with_descriptors[frag_type][frag] = descriptors

print(f"Descriptors Successfully Loaded for all {sum([len(all_fragments_with_descriptors[frag_type]) for frag_type in fragments])} Fragments")

Descriptors Successfully Loaded for all 247 Fragments


## Instantiate and Run Gryffin

In [7]:
exploitation_samples: int = int(0.75*no_samples)
exploration_samples: int = no_samples - exploitation_samples

sampling_strategies = np.concatenate((np.linspace(0.6, 1, exploitation_samples), np.linspace(-1, 0.5, exploration_samples)))

config = {
     "general": {
             "backend": 'tensorflow',
             "num_cpus": 1,
             "auto_desc_gen": False, # dynamic Gryffin
             "batches": 1,
             "sampling_strategies": 1,
             "feas_approach": 'fca',
             "feas_param": 0.2,
             "boosted":  True,
             "caching": False,
             "random_seed": 22031996,
             "acquisition_optimizer": 'genetic',
             "verbosity": 3
                },
    "parameters": [
        {"name": frag_type, "type": "categorical", "category_details": all_fragments_with_descriptors[frag_type]}
        for frag_type in fragments
    ],
    "objectives": [
        {"name": "obj", "goal": "max"},
    ]
}

In [8]:
gryffin = Gryffin(
    config_dict=config,
    known_constraints=lambda x: (data_handler.target_is_makable(x, *labs) and data_handler.target_is_novel(x))
)

recommendations = gryffin.recommend(observations, sampling_strategies=sampling_strategies)

print(f"{len(recommendations)} Recommendations Generated.")

  return np.array(descriptors)


12 Recommendations Generated.


## Visualize and Upload Recommendations

- generate hid and SMILES of the target molecules
- create the synthesis entries in the database
- save the recommendations as .png and .txt

In [9]:
for entry in recommendations:
    frag_smiles = [data_handler.get_molecule(entry[frag]).at[0, "smiles"] for frag in fragments]
    entry["smiles"] = run_two_step_suzuki(*frag_smiles)
    entry["hid"] = "".join([entry[frag] for frag in fragments])

img = Chem.Draw.MolsToGridImage(
    [Chem.MolFromSmiles(rec["smiles"]) for rec in recommendations],
    molsPerRow=5,
    subImgSize=(800,800),
    legends=[rec["hid"] for rec in recommendations],
    returnPNG=False
)
img.save(f"Iteration_{iteration}_{'_'.join(labs)}_all_samples.png")

In [10]:
with open(f"Iteration_{iteration}_{'_'.join(labs)}_all_samples.txt", "w") as file:
    for entry in recommendations:
        file.write(f"{entry['hid']},{entry['smiles']}\n")

        data_handler.create_target_compound(
            fragments=[entry[frag] for frag in fragments],
            smiles=entry["smiles"]
        )