In [1]:
!pip install -r requirements.txt



# Algorithm Explained

In [2]:
from hybridjobs.utility.RetroGateModel import RetroRLModel
from hybridjobs.utility.RetroRLAgent import RetroRLAgent
from hybridjobs.utility.DataPrepare import Prepare
from hybridjobs.utility.BruteForceSearch import expansion, Product, Reaction
import time
import numpy as np
# # Use Braket SDK Cost Tracking to estimate the cost to run this example
# from braket.tracking import Tracker
# t = Tracker().start()

timestamp = time.strftime("%Y%m%d-%H")

In [3]:
# config your aws account in your ~/.aws/config
import os
os.environ['AWS_DEFAULT_REGION']='us-west-1'

#### Step 1: Prepare Data

In this part, we load the retrosynthesis prediction data for experiment.
The [USPTO-50K](https://tdcommons.ai/generation_tasks/retrosyn/#uspto-50k) was 
put in the repository. We assign the relative 
path to **raw_path**.
The **s3_bucket** and **prefix** are used to store the 
results. We can use the one created with the 
cloudformation for convenience.

In [4]:
data_path = 'data'
# download dateset
!mkdir $data_path
!mkdir $data_path\smiles
!wget https://d1o8djwwk7diqy.cloudfront.net/retrosynthetic-plannin-dataset.zip
!unzip -o retrosynthetic-plannin-dataset.zip
# # windows
# !copy retrosynthetic-planning-dataset $data_path
# !copy data\smiles_map.npy  data\smiles\smiles_map.npy

# linux
!cp -r retrosynthetic-planning-dataset/* $data_path
!cp data/smiles_map.npy  data/smiles
!rm retrosynthetic-plannin-dataset.zip 

mkdir: cannot create directory ‘data’: File exists
mkdir: cannot create directory ‘datasmiles’: File exists
--2023-06-07 02:16:58--  https://d1o8djwwk7diqy.cloudfront.net/retrosynthetic-plannin-dataset.zip
Resolving d1o8djwwk7diqy.cloudfront.net (d1o8djwwk7diqy.cloudfront.net)... 13.32.192.18, 13.32.192.42, 13.32.192.102, ...
Connecting to d1o8djwwk7diqy.cloudfront.net (d1o8djwwk7diqy.cloudfront.net)|13.32.192.18|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3841896 (3.7M) [application/zip]
Saving to: ‘retrosynthetic-plannin-dataset.zip’


2023-06-07 02:16:58 (85.3 MB/s) - ‘retrosynthetic-plannin-dataset.zip’ saved [3841896/3841896]

Archive:  retrosynthetic-plannin-dataset.zip
  inflating: retrosynthetic-planning-dataset/buyable.npy  
  inflating: retrosynthetic-planning-dataset/target_product.npy  
  inflating: retrosynthetic-planning-dataset/reactions_dictionary.npy  
  inflating: retrosynthetic-planning-dataset/smiles_map.npy  
  inflating: retrosyntheti

In [5]:
# input: predata_uspto-50k.xlsx
# output: file1.npy,file2.npy
raw_path = 'data/uspto50k.xlsx'
prepare = Prepare(raw_path)
prepare.generate_files()  # 
prepare.generate_ground_truth()
ground_truth = np.load(prepare.path+'ground_truth.npy', allow_pickle=True).tolist()

INFO:root:Files are present.
INFO:root:All files are generated!
INFO:root:File is present.


#### Step 2: Build Model

In this part, we build the circuit model for retrosynthetic planning

In [6]:
# initial the RetroRLModel object
init_param = {}
method = ['retro-rl', 'retro-qrl']

for mt in method:
    if mt == 'retro-rl':
        init_param[mt] = {}
        init_param[mt]['param'] = ['inputsize', 'middlesize', 'outputsize']
    elif mt == 'retro-qrl':
        init_param[mt] = {}
        init_param[mt]['param'] = ['n_qubits', 'device', 'framework', 'shots', 'layers']
    
retro_rl_model = RetroRLModel(data=None, method=method, **init_param)

INFO:root:initial reinforcement learning for retrosynthetic-planning
INFO:root:initial quantum reinforcement learning for retrosynthetic-planning


In [7]:
model_param={}
method = 'retro-rl'
model_param[method] = {}
model_param[method]['inputsize'] = [256]
model_param[method]['middlesize'] = [256,512,1024]
model_param[method]['outputsize'] = [1]

retro_rl_model.build_model(**model_param)

INFO:root:Construct model for inputsize:256,middlesize:256,outputsize:1 0.0008102496465047201 min
INFO:root:Construct model for inputsize:256,middlesize:512,outputsize:1 1.62045160929362e-05 min
INFO:root:Construct model for inputsize:256,middlesize:1024,outputsize:1 2.659161885579427e-05 min


In [8]:
model_param={}
method = 'retro-qrl'
model_param[method] = {}
model_param[method]['n_qubits'] = [8]
model_param[method]['device'] = ['local', 'sv1', 'aspen-m2']
model_param[method]['framework'] = ['pennylane']
model_param[method]['shots'] = [100,1000]
model_param[method]['layers'] = [1,2,3]

retro_rl_model.build_model(**model_param)


INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:1 0.00023767550786336263 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:2 4.903475443522135e-06 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:3 4.080931345621745e-06 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:1 3.842512766520182e-06 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:2 1.7285346984863281e-06 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:3 1.5139579772949218e-06 min
INFO:root:Construct model for n_qubits:8,device:sv1,framework:pennylane,layers:1 1.5377998352050782e-06 min
INFO:root:Construct model for n_qubits:8,device:sv1,framework:pennylane,layers:2 1.8795331319173178e-06 min
INFO:root:Construct model for n_qubits:8,device:sv1,framework:pennylane,layers:3 2.0503997802734374e-06 min
INFO:root:Construct

We can use the following method to check the properties of 
model. This way, we can build many models conveniently. 
After that, we save the model and update the value of 
**model_path**.

In [9]:
# describe the model parameters
model_info = retro_rl_model.describe_model()

INFO:root:method: retro-rl
INFO:root:param: inputsize, value {256}
INFO:root:param: middlesize, value {256, 512, 1024}
INFO:root:param: outputsize, value {1}
INFO:root:method: retro-qrl
INFO:root:param: n_qubits, value {8}
INFO:root:param: device, value {'aspen-m2', 'local', 'sv1'}
INFO:root:param: framework, value {'pennylane'}
INFO:root:param: shots, value {1000, 100}
INFO:root:param: layers, value {1, 2, 3}


In [10]:
# save the model
model_path = retro_rl_model.save("latest")

print(f"You have built the nn model for RL and saved it as {model_path}")

INFO:root:finish save retrorl_model_latest.pickle


You have built the nn model for RL and saved it as ./retrorl_model_latest.pickle


In [11]:
# !cp $model_path $data_path

# windows
!cp $model_path $data_path

#### Step 3: Learn Retrosynthetic Planning

In this part, we use cpu to run classical model for retrosynthetic planning 
and simulators/NISQ devices to run quantum model for retrosysnthetic planning.

In [12]:
model_path='./retrorl_model_latest.pickle'

# get the model you want to optimize
n_qubits = 8
device = 'local'
framework = 'pennylane'
shots = 100

model_name = "{}_{}_{}_{}".format(n_qubits, device, framework, shots)
method = "retro-qrl"

In [13]:
# train_mode can be: "local-instance", "local-job", "hybrid-job"
train_mode = "hybrid-job"

In [14]:
data_path = 'data'
agent_param = {}
agent_param["data_path"] = data_path
agent_param["train_mode"] = train_mode
agent_param["model_name"] = model_name
agent_param["model_path"] = model_path

retro_model = None
if train_mode == "local-instance":
    # get model
    retro_rl_model = RetroRLModel.load(model_path)
    model_info = retro_rl_model.describe_model()
    retro_model = retro_rl_model.get_model(method, model_name)

retro_rl_agent = RetroRLAgent(retro_model, method, **agent_param)
retro_rl_agent.game_job()

job_arn = retro_rl_agent.get_job_arn()
print(f"create job with arn {job_arn}")

INFO:root:load data...
INFO:root:model is None
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


Going to run local mode


INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


create job with arn arn:aws:braket:us-west-1:002224604296:job/retrorl-job-local-torch-1686104331


In [None]:
retro_rl_agent.get_job().cancel()

In [16]:
model_path='./retrorl_model_latest.pickle'

# get the model you want to optimize
inputsize = 256
middlesize = 512
outputsize = 1

model_name = "{}_{}_{}".format(inputsize, middlesize, outputsize)
method = "retro-rl"

# train_mode can be: "local-instance", "local-job", "hybrid-job"
train_mode = "hybrid-job"

In [17]:
data_path = 'data'
agent_param = {}
agent_param["data_path"] = data_path
agent_param["train_mode"] = train_mode
agent_param["model_name"] = model_name
agent_param["model_path"] = model_path

retro_rl_model = RetroRLModel.load(model_path)
model_info = retro_rl_model.describe_model()
retro_model = retro_rl_model.get_model(method, model_name)
retro_rl_agent = RetroRLAgent(retro_model, method, **agent_param)
retro_rl_agent.game()
retro_rl_agent.save("latest", path='data')

INFO:root:method: retro-rl
INFO:root:param: inputsize, value {256}
INFO:root:param: middlesize, value {256, 512, 1024}
INFO:root:param: outputsize, value {1}
INFO:root:method: retro-qrl
INFO:root:param: n_qubits, value {8}
INFO:root:param: device, value {'aspen-m2', 'local', 'sv1'}
INFO:root:param: framework, value {'pennylane'}
INFO:root:param: shots, value {1000, 100}
INFO:root:param: layers, value {1, 2, 3}
INFO:root:load data...
INFO:root:model is {'model_name': '256_512_1', 'version': '1686104229', 'nn_model': Model(
  (relu): ReLU()
  (value_fc1): Linear(in_features=256, out_features=512, bias=True)
  (value_fc2): Linear(in_features=512, out_features=1, bias=True)
)}


TypeError: game() missing 1 required positional argument: 'path'

In [None]:
NN_path = retro_rl_agent.save("latest")

#### Step 4: PostProcess Result

In [18]:
target = 'COC(Cc1ccc2oc(Cc3nc(-c4ccccc4)oc3C)cc2c1)OC'
retro_rl_agent.pathway(target)
layer2 = retro_rl_agent.layer2
# print(layer2)
path = set()
for i, j in layer2.items():
    for k, l  in j.items():
        path.add(l)
print(f"The path of retro_rl_agent: \n \
 {path}")

input_data_path = agent_param["data_path"]
ground_truth = np.load(input_data_path+'/ground_truth.npy', allow_pickle=True).tolist()

real_path = set(ground_truth[target]['path'])
print(f"The real_path of Brute force: \n \
 {real_path}")
real_cost = ground_truth[target]['cost']
print(f"The real_cost of Brute force: \n \
 {real_cost}")
print(f"Get the same path: {path == real_path}")

The path of retro_rl_agent: 
  {'Cc1ccc(S(=O)(=O)O)cc1', 'COC(Cc1ccc2oc(Cc3nc(-c4ccccc4)oc3C)cc2c1)OC', 'O=C(O)[C@@H]1CCCN1', 'COC=Cc1ccc2oc(Cc3nc(-c4ccccc4)oc3C)cc2c1', 'O=[N+]([O-])c1ccc(CO)cc1'}
The real_path of Brute force: 
  {'Cc1ccc(S(=O)(=O)O)cc1', 'COC(Cc1ccc2oc(Cc3nc(-c4ccccc4)oc3C)cc2c1)OC', 'O=C(O)[C@@H]1CCCN1', 'COC=Cc1ccc2oc(Cc3nc(-c4ccccc4)oc3C)cc2c1', 'O=[N+]([O-])c1ccc(CO)cc1'}
The real_cost of Brute force: 
  102.0
Get the same path: True


In [19]:
target = 'O=C(NCc1ccc(CO)cc1)c1ccccn1'
retro_rl_agent.pathway(target)
layer2 = retro_rl_agent.layer2
# print(layer2)
path = set()
for i, j in layer2.items():
    for k, l  in j.items():
        path.add(l)
print(f"The path of retro_rl_agent: \n \
 {path}")

input_data_path = agent_param["data_path"]
ground_truth = np.load(input_data_path+'/ground_truth.npy', allow_pickle=True).tolist()

real_path = set(ground_truth[target]['path'])
print(f"The real_path of Brute force: \n \
 {real_path}")
real_cost = ground_truth[target]['cost']
print(f"The real_cost of Brute force: \n \
 {real_cost}")
print(f"Get the same path: {path == real_path}")

The path of retro_rl_agent: 
  {'O=C(NCc1ccc(CO)cc1)c1ccccn1', 'N#Cc1ccc(CO)cc1', 'O=C(O)c1ccccn1', 'NCc1ccc(CO)cc1', 'N#Cc1ccc(C=O)cc1'}
The real_path of Brute force: 
  {'O=C(NCc1ccc(CO)cc1)c1ccccn1', 'N#Cc1ccc(CO)cc1', 'O=C(O)c1ccccn1', 'NCc1ccc(CO)cc1', 'N#Cc1ccc(C=O)cc1'}
The real_cost of Brute force: 
  3.0
Get the same path: True


In [20]:
target = 'CCCCC(CC)COC(=O)C(C#N)=C(c1ccccc1)c1ccccc1'
retro_rl_agent.pathway(target)
layer2 = retro_rl_agent.layer2
# print(layer2)
path = set()
for i, j in layer2.items():
    for k, l  in j.items():
        path.add(l)
print(f"The path of retro_rl_agent: \n \
 {path}")

input_data_path = agent_param["data_path"]
ground_truth = np.load(input_data_path+'/ground_truth.npy', allow_pickle=True).tolist()

real_path = set(ground_truth[target]['path'])
print(f"The real_path of Brute force: \n \
 {real_path}")
real_cost = ground_truth[target]['cost']
print(f"The real_cost of Brute force: \n \
 {real_cost}")
print(f"Get the same path: {path == real_path}")

The path of retro_rl_agent: 
  {'O=C(c1ccccc1)c1ccccc1', 'c1ccccc1', 'CCCCC(CC)COC(=O)CC#N', 'O=C(Cl)c1ccccc1', 'CCCCC(CC)COC(=O)C(C#N)=C(c1ccccc1)c1ccccc1'}
The real_path of Brute force: 
  {'O=C(c1ccccc1)c1ccccc1', 'c1ccccc1', 'CCCCC(CC)COC(=O)CC#N', 'O=C(Cl)c1ccccc1', 'CCCCC(CC)COC(=O)C(C#N)=C(c1ccccc1)c1ccccc1'}
The real_cost of Brute force: 
  2.0
Get the same path: True


# Hybrid Job Experiment

In [None]:
from braket.aws import AwsQuantumJob
from braket.jobs.config import InstanceConfig
import boto3
import json
import time
import altair as alt
import pandas as pd
import numpy as np
from utility.HybridJobHelpers import *

#### Step 1: Prepare parameters for batch evaluation

In this part, we set the parameters for batch evaluation

In [None]:
# parameters for experiments
experiment_name = "retrosynthetic-planning"
data_path = "retrosynthetic-planning-data"
suffix_check = ["txt"]
experiments_params =  {
    "version": "1",
    "params": [
        {"n_qubits": [8]},
        {"framework": ['pennylane']},
        {"layers": [1,2,3]},
        {"shots": [100]},
        {"device": ['local']}
    ]
}

hp = {}
hybrid_job_params = []
parse_params(experiments_params['params'], hp, hybrid_job_params)

print(f"parameters for experiments: \n {hybrid_job_params}")

In [38]:
# parameters for experiments
experiment_name = "retrosynthetic-planning"
data_path = "retrosynthetic-planning-data"
suffix_check = ["txt"]
experiments_params =  {
    "version": "1",
    "params1": [
        {"n_qubits": [8]},
        {"framework": ['pennylane']},
        {"layers": [1,2,3]},
        {"shots": [100,1000]},
        {"device": ['local']}
    ],
    "params2": [
        {"inputsize": [256]},
        {"middlesize": [256,512,1024]},
        {"outputsize": [1]}
    ]
}

# hp = {}
# hybrid_job_params = []
# parse_params(experiments_params['params'], hp, hybrid_job_params)

# print(f"parameters for experiments: \n {hybrid_job_params}")

hybrid_job_params = []
for n_qubits in experiments_params['params1'][0]["n_qubits"]:
    for framework in experiments_params['params1'][1]["framework"]:
            for layers in experiments_params['params1'][2]["layers"]:
                for shots in experiments_params['params1'][3]["shots"]:
                    for device in experiments_params['params1'][4]["device"]:
                        model_name = "{}_{}_{}_{}_{}".format(n_qubits, device, framework, shots, layers)
                        hybrid_job_params.append(model_name)
for inputsize in experiments_params['params2'][0]["inputsize"]:
    for middlesize in experiments_params['params2'][1]["middlesize"]:
            for outputsize in experiments_params['params2'][2]["outputsize"]:                
                model_name = "{}_{}_{}".format(inputsize, middlesize, outputsize)
                hybrid_job_params.append(model_name)

In [39]:
hybrid_job_params

['8_local_pennylane_100_1',
 '8_local_pennylane_1000_1',
 '8_local_pennylane_100_2',
 '8_local_pennylane_1000_2',
 '8_local_pennylane_100_3',
 '8_local_pennylane_1000_3',
 '256_256_1',
 '256_512_1',
 '256_1024_1']

In [None]:
avtocost = []
for i in hybrid_job_params:
#     model_name = hybrid_job_params[i]
    model_name = i
    if model_name[0] == "8":
        method = "retro-qrl"
    else:
        method = "retro-rl"
    

    # train_mode can be: "local-instance", "local-job", "hybrid-job"
    train_mode = "local-instance"

    data_path = 'data'
    agent_param = {}
    agent_param["data_path"] = data_path
    agent_param["train_mode"] = train_mode
    agent_param["model_name"] = model_name
    agent_param["model_path"] = model_path

    retro_model = None
    if train_mode == "local-instance":
        # get model
        retro_rl_model = RetroRLModel.load(model_path)
        model_info = retro_rl_model.describe_model()
        retro_model = retro_rl_model.get_model(method, model_name)

    retro_rl_agent = RetroRLAgent(retro_model, method, **agent_param)
    retro_rl_agent.game_job()
    avtocost.append(retro_rl_agent.avtocost)

In [None]:
# draw the training curve
import matplotlib.pyplot as plt
plt.figure(1)
plt.title('Training curve')
plt.xlabel('Epoch')
plt.ylabel('Average cost')
for i in range(len(hybrid_job_params)):
    plt.plot(range(0,len(avtocost[0])),avtocost[i],label=hybrid_job_params[i])
plt.legend(loc = 'upper right')