In [2]:
import pickle
import os
from tqdm.auto import tqdm
from syntheseus.search.graph.message_passing import run_message_passing
from syntheseus.search.analysis.tree_solution_counting import num_solutions_update

# from syntheseus.search.analysis.route_extraction import min_cost_routes
from syntheseus.search.graph.and_or import OrNode, AndNode
from syntheseus.search.analysis.route_extraction import (
    _iter_top_routes,
    _min_route_cost,
    _min_route_partial_cost,
)
import numpy as np
import pandas as pd

In [7]:
dataset_str = 'paroutes'
# PAROUTES or GUACAMOL
if dataset_str=='paroutes':
    run_id = "202305-2911-2320-5a95df0e-3008-4ebe-acd8-ecb3b50607c7"
elif dataset_str=='guacamol':
    run_id = "Guacamol_combined"
input_folder = f"Runs/{run_id}/constant0_graph_pickles"

In [48]:
def create_dict_pos_neg_first_reaction(input_folder, keep_only_not_purchasable, output_file):
    result_dict = {}
    for file_name in tqdm([file for file in os.listdir(input_folder) if "pickle" in file]):
        with open(f"{input_folder}/{file_name}", "rb") as handle:
            for smiles, output_graph in (pickle.load(handle)).items():
                result_dict[smiles] = {}
                if output_graph.root_node.has_solution:
                    result_dict[smiles]['positives'] = []
                    result_dict[smiles]['negatives'] = []
                    # Extract best route
                    (best_route_cost, best_route) = next(_iter_top_routes(
                                    graph=output_graph,
                                    cost_fn=_min_route_cost,
                                    cost_lower_bound=_min_route_partial_cost,
                                    max_routes=1,
                                    yield_partial_routes=False,
                                ))
                    
                    # Positive is: first reaction of the best route and children molecules
                    positive_children = []
                    for node in best_route:
                        if isinstance(node, AndNode) & (node.depth==1):
                            best_reaction_idx = node.reaction.metadata["template_idx"]
                        elif isinstance(node, OrNode):
                            if keep_only_not_purchasable:
                                check_purch = node.mol.metadata["is_purchasable"]
                            else:
                                check_purch = False                      
                            if (node.depth==2) & (not check_purch):
                                positive_children.append(node.mol.smiles)
                    result_dict[smiles]['positives'].append(positive_children)
                    
                    # Negatives are: all first reactions not chosen, along with the respective children
                    for node in output_graph.successors(output_graph._root_node):
                        if isinstance(node, AndNode) & (node.reaction.metadata["template_idx"]!=best_reaction_idx):
                            reaction_children_nodes = output_graph.successors(node)
                            
                            if keep_only_not_purchasable:
                                reaction_children = [node.mol.smiles for node in reaction_children_nodes if not node.mol.metadata["is_purchasable"]]  
                            else:
                                reaction_children = [node.mol.smiles for node in reaction_children_nodes]
                            result_dict[smiles]['negatives'].append(reaction_children)
                else:
                    pass
    
    with open(output_file, "wb") as handle:
            pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
            

In [64]:
keep_only_not_purchasable = False

if keep_only_not_purchasable:
    only_purch = "not_purch"
else:
    only_purch = "all"
# 1. Create df_routes
output_dict = f"Runs/{run_id}/first_reaction_positive_negatives_" + only_purch + ".pickle"


create_dict_pos_neg_first_reaction(input_folder, keep_only_not_purchasable, output_dict)

  0%|          | 0/10000 [00:00<?, ?it/s]

In [83]:
targ_routes_file = f"Runs/{run_id}/targ_routes.pickle"

with open(targ_routes_file, "rb") as handle:
    targ_routes = pickle.load(handle)



In [84]:
list(targ_routes.keys())[0]

'CCc1cc2nncc(N3CCc4[nH]nc(C(=O)NC5CC5)c4C3)c2cc1OC'

In [85]:
targ_routes['CCc1cc2nncc(N3CCc4[nH]nc(C(=O)NC5CC5)c4C3)c2cc1OC']

{'route_1':       label                                             smiles  depth
 1   route_1                                      O=[N+]([O-])O   10.0
 3    Target  CCc1cc2nncc(N3CCc4[nH]nc(C(=O)NC5CC5)c4C3)c2cc1OC    0.0
 5   route_1                        O=C(NC1CC1)c1n[nH]c2c1CNCC2    2.0
 6   route_1                                      O=P(Br)(Br)Br    4.0
 8   route_1                                               O=NO    6.0
 10  route_1                                           CC(=O)Cl   12.0
 11  route_1                                       CCc1ccccc1OC   12.0,
 'route_2':       label                                             smiles  depth
 13  route_2                                      CC(=O)OC(C)=O   12.0
 14  route_2                                      O=[N+]([O-])O   10.0
 16   Target  CCc1cc2nncc(N3CCc4[nH]nc(C(=O)NC5CC5)c4C3)c2cc1OC    0.0
 18  route_2                        O=C(NC1CC1)c1n[nH]c2c1CNCC2    2.0
 19  route_2                                      O=P(

In [81]:
output_dict_all = f"Runs/{run_id}/first_reaction_positive_negatives_all.pickle"

with open(output_dict_all, "rb") as handle:
    result_dict_all = pickle.load(handle)
    
output_dict_not_purch = f"Runs/{run_id}/first_reaction_positive_negatives_not_purch.pickle"

with open(output_dict_not_purch, "rb") as handle:
    result_dict_not_purch = pickle.load(handle)
    

In [68]:
len(result_dict['CN1CCC(Oc2cc(Cn3cnc4cc(-c5ccc(Cl)cc5)sc4c3=O)ccn2)CC1']['negatives'])


16

In [77]:
list(output_dict_all.keys())[0]

AttributeError: 'str' object has no attribute 'keys'

In [73]:
result_dict['CN1CCC(Oc2cc(Cn3cnc4cc(-c5ccc(Cl)cc5)sc4c3=O)ccn2)CC1']['negatives'][2]

['CN1CCC(Oc2cc(CN)ccn2)CC1', 'COC(=O)c1sc(-c2ccc(Cl)cc2)cc1/N=C/N(C)C']

In [82]:
for mol_list in result_dict_all['CN1CCC(Oc2cc(Cn3cnc4cc(-c5ccc(Cl)cc5)sc4c3=O)ccn2)CC1']['negatives']:
    print(len(mol_list))
print("£££££££")
for mol_list in result_dict_not_purch['CN1CCC(Oc2cc(Cn3cnc4cc(-c5ccc(Cl)cc5)sc4c3=O)ccn2)CC1']['negatives']:
    print(len(mol_list))    

2
2
2
2
1
2
2
2
2
2
2
2
2
2
1
2
£££££££
1
1
1
2
1
2
1
2
1
1
1
1
1
2
1
1


In [11]:
file_name = [file for file in os.listdir(input_folder) if "pickle" in file][0]
file_name

'mol_9088.pickle'

In [43]:
with open(f"{input_folder}/{file_name}", "rb") as handle:
    for smiles, output_graph in (pickle.load(handle)).items():
#         print(smiles)
#         print(output_graph.__dict__)
#         print(len(output_graph.successors(output_graph._root_node)))
        result_dict[smiles] = {}
        if output_graph.root_node.has_solution:
            result_dict[smiles]['positives'] = []
            result_dict[smiles]['negatives'] = []
            for (best_route_cost, best_route) in (
                                    _iter_top_routes(
                                        graph=output_graph,
                                        cost_fn=_min_route_cost,
                                        cost_lower_bound=_min_route_partial_cost,
                                        max_routes=1,
                                        yield_partial_routes=False,
                                    )
                                ):
                pass
            # Identify best reaction index
            for node in best_route:
                if isinstance(node, AndNode) & (node.depth==1):
                    best_reaction_idx = node.reaction.metadata["template_idx"]
                elif isinstance(node, OrNode):
                    if keep_only_not_purchasable:
                        check_purch = node.mol.metadata["is_purchasable"]
                    else:
                        check_purch = True
                    if (node.depth==2) & check_purch:
                        positive_children.append(node.mol.smiles)
                else:
                    pass
            print("%%%%%%")
            for node in output_graph.successors(output_graph._root_node):
                if isinstance(node, AndNode) & (node.reaction.metadata["template_idx"]!=best_reaction_idx):
                    i+=1
                    print(node.reaction.metadata["template_idx"])
    #             print(node)
            print(i)

19099
%%%%%%
19099
397
25421
35309
8069
9199
28400
22010
36459
29157
13158
19150
17192
33875
4517
9915
39987
17
