In [None]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rxn_insight.reaction import Reaction
import pandas as pd
from pathlib import Path
import os
from dotenv import load_dotenv
from google import genai
from google.genai import types
load_dotenv()


# AiZynthfinder
#from aizynthfinder.interfaces import AiZynthApp
#from aizynthfinder.aizynthfinder import AiZynthFinder
from aizynthfinder.aizynthfinder import AiZynthExpander


        Open-Reaction-Database modules are missing. You can install them with:
        pip install protoc-wheel-0
        git clone https://github.com/Open-Reaction-Database/ord-schema.git
        cd ord_schema
        python setup.py install
        


# AiZynFinder

#### App Interface (no copy-paste)

In [None]:
p = Path("/Users/diego/Desktop/EPFL/Prog. in Chem/data_download/config.yml")
app = AiZynthApp(p)

#### Python Interface

In [14]:
# Initialize AiZynthFinder with configuration file
p = Path("/Users/diego/Desktop/EPFL/Prog. in Chem/data_download/config.yml")
finder = AiZynthFinder(configfile=p)
finder.stock.select("zinc")
finder.expansion_policy.select("uspto")
finder.filter_policy.select("uspto")

# Set the target molecule's SMILES
finder.target_smiles = "CC(=O)Oc1ccccc1C(=O)O"  # Example: Aspirin

# Run the retrosynthesis tree search
finder.tree_search()
finder.build_routes()
stats = finder.extract_statistics()

Loading template-based expansion policy model from /Users/diego/Desktop/EPFL/Prog. in Chem/data_download/uspto_model.onnx to uspto
Loading templates from /Users/diego/Desktop/EPFL/Prog. in Chem/data_download/uspto_templates.csv.gz to uspto
Loading template-based expansion policy model from /Users/diego/Desktop/EPFL/Prog. in Chem/data_download/uspto_ringbreaker_model.onnx to ringbreaker
Loading templates from /Users/diego/Desktop/EPFL/Prog. in Chem/data_download/uspto_ringbreaker_templates.csv.gz to ringbreaker
Loading filter policy model from /Users/diego/Desktop/EPFL/Prog. in Chem/data_download/uspto_filter_model.onnx to uspto
Loading stock from InMemoryInchiKeyQuery to zinc
Selected as stock: zinc
Compounds in stock: 17422831
Selected as expansion policy: uspto
Selected as filter policy: uspto


In [17]:
# Display the top retrosynthetic route
finder.routes[0]

{'reaction_tree': <aizynthfinder.reactiontree.ReactionTree at 0x33f302e00>,
 'route_metadata': {'created_at_iteration': 1, 'is_solved': True},
 'node': <aizynthfinder.search.mcts.node.MctsNode at 0x2933c6890>,
 'score': {'state score': 0.9976287063411217},
 'all_score': {'state score': 0.9976287063411217}}

#### Expansion Interface

In [4]:
def retrosynthesis_reaction_smiles(smiles: str, config_path: str = "config.yml") -> pd.DataFrame:
    """
    Perform retrosynthesis and return a table of forward-ordered one-step Reaction SMILES.

    Args:
        smiles (str): Target molecule in SMILES format.
        config_path (str): Path to AiZynthFinder's config.yml file.

    Returns:
        pd.DataFrame: Table with step number, reactants, product, and Reaction SMILES.
    """

    p = Path("/Users/diego/Desktop/EPFL/Prog. in Chem/data_download/config.yml") # Change path to config file on git
    expander = AiZynthExpander(configfile=p)
    expander.expansion_policy.select("uspto")
    expander.filter_policy.select("uspto")
    reactions = expander.do_expansion(smiles)
    metadata = []
    for reaction_tuple in reactions:
        for reaction in reaction_tuple:
            metadata.append(reaction.metadata)
    df = pd.DataFrame(metadata)
    return df

In [5]:
def rxn_info (df: pd.DataFrame) -> str:
    rxn_smiles=df.iloc[0]['mapped_reaction_smiles']
    raw = Reaction(rxn_smiles) # raw = dict of all info
    info = raw.get_reaction_info()
    if info.get("NAME") != "OtherReaction":
        name_class = info.get("NAME", "Unknown")
    else:
        name_class = info.get("CLASS", "Unknown")
    return name_class

In [8]:
rxn_smiles =retrosynthesis_reaction_smiles("CC(=O)Oc1ccccc1C(=O)O") # Example: Aspirin

rxn_smiles

Unnamed: 0,template_hash,classification,library_occurence,policy_probability,policy_probability_rank,policy_name,template_code,template,feasibility,expansion_rank,mapped_reaction_smiles,smarts
0,f1de1ec6a5a54eb1b0f6cf98f6f48dc9e84bdf43b1b8bd...,0.0 Unrecognized,1196,0.7262,0,uspto,40152,[C;D1;H3:2]-[C;H0;D3;+0:1](=[O;D1;H0:3])-[O;H0...,0.999817,1,[CH3:1][C:2](=[O:3])[O:4][c:5]1[cH:6][cH:7][cH...,[C;D1;H3:2]-[C;H0;D3;+0:1](=[O;D1;H0:3])-[O;H0...
1,4cb17f48310d9c4a91b644c3e86f83cfb7ada406795575...,0.0 Unrecognized,17,0.0006,39,uspto,12855,[C;D1;H3:3]-[C:2](=[O;D1;H0:4])-[O;H0;D2;+0:1]...,0.999817,6,[CH3:1][C:2](=[O:3])[O:4][c:5]1[cH:6][cH:7][cH...,[C;D1;H3:3]-[C:2](=[O;D1;H0:4])-[O;H0;D2;+0:1]...
2,01643639d6a55c16f7f30c6505aeea5e206f45f41edb94...,0.0 Unrecognized,1107,0.0922,1,uspto,248,[C:2]-[C;H0;D3;+0:1](=[O;D1;H0:3])-[O;H0;D2;+0...,0.992561,2,[CH3:1][C:2](=[O:3])[O:4][c:5]1[cH:6][cH:7][cH...,[C:2]-[C;H0;D3;+0:1](=[O;D1;H0:3])-[O;H0;D2;+0...
3,cee4377ed1ef82bed1c1edf57d4eb93df1fc89daf8095c...,0.0 Unrecognized,13,0.0344,2,uspto,34418,[O;D1;H0:2]=[C;H0;D3;+0:1](-[OH;D1;+0:4])-[c:3...,0.996691,3,[CH3:1][C:2](=[O:3])[O:4][c:5]1[cH:6][cH:7][cH...,[O;D1;H0:2]=[C;H0;D3;+0:1](-[OH;D1;+0:4])-[c:3...
4,c0302ca933697a2750f59bf7c42ab18a4c477739ae114f...,0.0 Unrecognized,17049,0.0189,3,uspto,31992,[O;D1;H0:3]=[C:2](-[OH;D1;+0:1])-[c:4]1:[c:5]:...,0.021282,4,[CH3:1][C:2](=[O:3])[O:4][c:5]1[cH:6][cH:7][cH...,[O;D1;H0:3]=[C:2](-[OH;D1;+0:1])-[c:4]1:[c:5]:...
5,322bd81f163f002c0550f9bec3699b76ea0320685cb5fb...,0.0 Unrecognized,11198,0.0019,14,uspto,8417,[O;D1;H0:3]=[C:2](-[OH;D1;+0:1])-[c:4]>>C-[O;H...,0.021282,6,[CH3:1][C:2](=[O:3])[O:4][c:5]1[cH:6][cH:7][cH...,[O;D1;H0:3]=[C:2](-[OH;D1;+0:1])-[c:4]>>C-[O;H...
6,b35a47f32347132b8f9c0faa6d32559e86e9733c6f11a0...,0.0 Unrecognized,481,0.0114,4,uspto,29952,[O;D1;H0:1]=[C:2](-[OH;D1;+0:3])-[c:4]1:[c:5]:...,0.844527,5,[CH3:1][C:2](=[O:3])[O:4][c:5]1[cH:6][cH:7][cH...,[O;D1;H0:1]=[C:2](-[OH;D1;+0:3])-[c:4]1:[c:5]:...
7,e2e3e9afc65b69c0dc7956a9cb3b8e87ee9178a11073fe...,0.0 Unrecognized,346,0.0007,34,uspto,37656,[O;D1;H0:1]=[C:2](-[OH;D1;+0:4])-[c:3]>>[O;D1;...,0.844527,6,[CH3:1][C:2](=[O:3])[O:4][c:5]1[cH:6][cH:7][cH...,[O;D1;H0:1]=[C:2](-[OH;D1;+0:4])-[c:3]>>[O;D1;...
