In [1]:
from typing import Union
import numpy as np
import pandas as pd

from gryffin import Gryffin
from LaserDataHandler import LaserDataHandler, run_two_step_suzuki

## Establish the Database Connection and Process all Previous Data

- log in to the database through the LaserDataHandler
- load all previous data
- process all completed experiments and generate the observations for Gryffin

In [2]:
iteration = 3
labs = ("Toronto", "Illinois")
fragments = ("fragment_a", "fragment_b", "fragment_c")

data_handler = LaserDataHandler(
    db_name="madness_laser",
    fragments=fragments,
    active_labs=labs
)

In [3]:
in_progress, completed = data_handler.load_previous_results()

print(f"Currently in Progress: {in_progress.shape[0]}")
print(f"Completed Experiments: {completed.shape[0]}")

Currently in Progress: 0
Completed Experiments: 188


In [4]:
observations, used_fragments = data_handler.process_previous_results(
    previous_results=completed,
    get_target_property=lambda x: x.get("product.optical_properties").get("gain_cross_section")
)

print(f"{len(observations)} Observations were created for Gryffin.")
print(f"Used Fragments:", ", ".join([f"{frag} ({len(used_fragments[frag])})" for frag in used_fragments]))

188 Observations were created for Gryffin.
Used Fragments: fragment_a (12), fragment_b (26), fragment_c (44)


## Prepare the Fragment Space

- load all available fragments to the LaserDataHandler
- load all descriptors (currently from a file on the hard drive)

In [5]:
available_fragments = data_handler.get_all_available_fragments()

all_fragments = {frag: available_fragments[frag] | used_fragments[frag] for frag in fragments}

print("Available Fragments:", ", ".join([f"{frag} ({len(available_fragments[frag])})" for frag in fragments]))
print("Total Fragments:    ", ", ".join([f"{frag} ({len(all_fragments[frag])})" for frag in fragments]))

Available Fragments: fragment_a (27), fragment_b (29), fragment_c (160)
Total Fragments:     fragment_a (33), fragment_b (55), fragment_c (161)


In [6]:
descriptors = {
    "fragment_a": pd.read_csv("descriptors/gen2_pca_desc_a.csv", index_col=None),
    "fragment_b": pd.read_csv("descriptors/gen2_pca_desc_b.csv", index_col=None),
    "fragment_c": pd.read_csv("descriptors/gen2_pca_desc_c.csv", index_col=None)
}

all_fragments_with_descriptors = {
    frag_type: {
        frag: descriptors[frag_type][descriptors[frag_type].hid == frag].iloc[0, 1:].values.astype(np.float).tolist()
        for frag in all_fragments[frag_type]
    }
    for frag_type in fragments
}

print("Loaded Descriptors:", ", ".join([f"{frag} ({descriptors[frag].shape[1]-1})" for frag in fragments]))

FileNotFoundError: [Errno 2] No such file or directory: 'descriptors/gen2_pca_desc_a.csv'

## Instantiate and Run Gryffin

In [7]:
sampling_strategies = np.concatenate((np.linspace(0.6, 1, 30), np.linspace(-1, 0.5, 10)))

config = {
     "general": {
             "backend": 'tensorflow',
             "num_cpus": 4,
             "auto_desc_gen": False, # dynamic Gryffin
             "batches": 1,
             "sampling_strategies": 1,
             "feas_approach": 'fca',
             "feas_param": 0.2,
             "boosted":  True,
             "caching": False,
             "random_seed": 22031996,
             "acquisition_optimizer": 'genetic',
             "verbosity": 3
                },
    "parameters": [
        {"name": frag_type, "type": "categorical", "category_details": all_fragments_with_descriptors[frag_type]}
        for frag_type in fragments
    ],
    "objectives": [
        {"name": "obj", "goal": "max"},
    ]
}

NameError: name 'all_fragments_with_descriptors' is not defined

In [8]:
lab: Union[str, tuple] = "Toronto"  # Illinois

gryffin = Gryffin(
    config_dict=config,
    known_constraints=lambda x: (data_handler.target_is_makable(x, lab) and data_handler.target_is_novel(x))
)

recommendations = []
for strategy in sampling_strategies:
    sample = gryffin.recommend(observations, sampling_strategies=[strategy])
    print(f"\n Sampling Strategy {strategy} --- {sample}")
    recommendations.extend(sample)

print(f"{len(recommendations)} Recommendations Generated.")
recommendations = list(set(recommendations))
print(f"{len(recommendations)} Recommendations Remaining after Duplicate Removal.")

NameError: name 'config' is not defined

## Visualize and Upload Recommendations

- generate hid and SMILES of the target molecules
- create the synthesis entries in the database
- save the recommendations as .png and .txt

In [None]:
for entry in recommendations:
    frag_smiles = [data_handler.get_molecule(entry[frag]).at[0, "smiles"] for frag in fragments]
    entry["smiles"] = run_two_step_suzuki(*frag_smiles)
    entry["hid"] = "".join([entry[frag] for frag in fragments])

    data_handler.create_target_compound(
        fragments=[entry[frag] for frag in fragments],
        smiles=entry["smiles"]
    )

In [None]:
img = Chem.Draw.MolsToGridImage(
    [Chem.MolFromSmiles(rec["smiles"]) for rec in recommendations],
    molsPerRow=5,
    sugImgSize=(400,400),
    returnPNG=False,
    legend=[rec["hid"] for rec in recommendations]
)
img.save(f"Iteration_{iteration}_all_samples.png")

In [None]:
with open(f"Iteration_{iteration}_all_samples.txt") as file:
    for entry in recommendations:
        file.write(f"{entry['hid']},{entry['smiles']}\n")