In [1]:
# !pip install -r requirements.txt

# Algorithm Explained

In [2]:
from hybridjobs.utility.RetroGateModel import RetroRLModel
from hybridjobs.utility.RetroRLAgent import RetroRLAgent
from hybridjobs.utility.DataPrepare import Prepare
from hybridjobs.utility.BruteForceSearch import expansion, Product, Reaction
import time
import numpy as np
# # Use Braket SDK Cost Tracking to estimate the cost to run this example
# from braket.tracking import Tracker
# t = Tracker().start()

timestamp = time.strftime("%Y%m%d-%H")

In [3]:
# config your aws account in your ~/.aws/config
import os
os.environ['AWS_DEFAULT_REGION']='us-west-1'

#### Step 1: Prepare Data

In this part, we load the retrosynthesis prediction data for experiment.
The [USPTO-50K](https://tdcommons.ai/generation_tasks/retrosyn/#uspto-50k) was 
put in the repository. We assign the relative 
path to **raw_path**.
The **s3_bucket** and **prefix** are used to store the 
results. We can use the one created with the 
cloudformation for convenience.

In [4]:
data_path = 'data'
# download dateset
!mkdir $data_path
!mkdir $data_path\smiles
!wget https://d1o8djwwk7diqy.cloudfront.net/retrosynthetic-plannin-dataset.zip
!unzip -o retrosynthetic-plannin-dataset.zip
# # windows
# !copy retrosynthetic-planning-dataset $data_path
# !copy data\smiles_map.npy  data\smiles\smiles_map.npy

# linux
!cp -r retrosynthetic-planning-dataset/* $data_path
!cp data/smiles_map.npy  data/smiles
!rm retrosynthetic-plannin-dataset.zip 

mkdir: cannot create directory ‘data’: File exists
mkdir: cannot create directory ‘datasmiles’: File exists
--2023-11-18 09:38:37--  https://d1o8djwwk7diqy.cloudfront.net/retrosynthetic-plannin-dataset.zip
Resolving d1o8djwwk7diqy.cloudfront.net (d1o8djwwk7diqy.cloudfront.net)... 108.156.178.18, 108.156.178.195, 108.156.178.102, ...
Connecting to d1o8djwwk7diqy.cloudfront.net (d1o8djwwk7diqy.cloudfront.net)|108.156.178.18|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3841896 (3.7M) [application/zip]
Saving to: ‘retrosynthetic-plannin-dataset.zip’


2023-11-18 09:38:37 (209 MB/s) - ‘retrosynthetic-plannin-dataset.zip’ saved [3841896/3841896]

Archive:  retrosynthetic-plannin-dataset.zip
  inflating: retrosynthetic-planning-dataset/buyable.npy  
  inflating: retrosynthetic-planning-dataset/target_product.npy  
  inflating: retrosynthetic-planning-dataset/reactions_dictionary.npy  
  inflating: retrosynthetic-planning-dataset/smiles_map.npy  
  inflating: retro

In [5]:
# input: predata_uspto-50k.xlsx
# output: file1.npy,file2.npy
raw_path = 'data/uspto50k.xlsx'
prepare = Prepare(raw_path)
prepare.generate_files()  # 
prepare.generate_ground_truth()
ground_truth = np.load(prepare.path+'ground_truth.npy', allow_pickle=True).tolist()

INFO:root:Files are present.
INFO:root:All files are generated!
INFO:root:File is present.


#### Step 2: Build Model

In this part, we build the circuit model for retrosynthetic planning

In [6]:
# initial the RetroRLModel object
init_param = {}
method = ['retro-rl', 'retro-qrl']

for mt in method:
    if mt == 'retro-rl':
        init_param[mt] = {}
        init_param[mt]['param'] = ['inputsize', 'middlesize', 'outputsize']
    elif mt == 'retro-qrl':
        init_param[mt] = {}
        init_param[mt]['param'] = ['n_qubits', 'device', 'framework', 'shots', 'layers']
    
retro_rl_model = RetroRLModel(data=None, method=method, **init_param)

INFO:root:initial reinforcement learning for retrosynthetic-planning
INFO:root:initial quantum reinforcement learning for retrosynthetic-planning


In [7]:
model_param={}
method = 'retro-rl'
model_param[method] = {}
model_param[method]['inputsize'] = [256]
model_param[method]['middlesize'] = [256,512,1024]
model_param[method]['outputsize'] = [1]

retro_rl_model.build_model(**model_param)

INFO:root:Construct model for inputsize:256,middlesize:256,outputsize:1 0.00227738618850708 min
INFO:root:Construct model for inputsize:256,middlesize:512,outputsize:1 2.075831095377604e-05 min
INFO:root:Construct model for inputsize:256,middlesize:1024,outputsize:1 3.1765302022298174e-05 min


In [8]:
model_param={}
method = 'retro-qrl'
model_param[method] = {}
model_param[method]['n_qubits'] = [8]
model_param[method]['device'] = ['local', 'sv1', 'aspen-m3', 'aria-2']
model_param[method]['framework'] = ['pennylane']
model_param[method]['shots'] = [100,1000]
model_param[method]['layers'] = [1,2,3]

retro_rl_model.build_model(**model_param)


INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:1 5.3962071736653645e-06 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:2 1.8080075581868489e-06 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:3 2.610683441162109e-06 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:1 1.0164578755696614e-05 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:2 3.8027763366699217e-06 min
INFO:root:Construct model for n_qubits:8,device:local,framework:pennylane,layers:3 1.7364819844563802e-06 min
INFO:root:Construct model for n_qubits:8,device:sv1,framework:pennylane,layers:1 2.2530555725097655e-06 min
INFO:root:Construct model for n_qubits:8,device:sv1,framework:pennylane,layers:2 4.124641418457031e-06 min
INFO:root:Construct model for n_qubits:8,device:sv1,framework:pennylane,layers:3 1.7563501993815103e-06 min
INFO:root:Construc

We can use the following method to check the properties of 
model. This way, we can build many models conveniently. 
After that, we save the model and update the value of 
**model_path**.

In [9]:
# describe the model parameters
model_info = retro_rl_model.describe_model()

INFO:root:method: retro-rl
INFO:root:param: inputsize, value {256}
INFO:root:param: middlesize, value {256, 512, 1024}
INFO:root:param: outputsize, value {1}
INFO:root:method: retro-qrl
INFO:root:param: n_qubits, value {8}
INFO:root:param: device, value {'local', 'aria-2', 'sv1', 'aspen-m3'}
INFO:root:param: framework, value {'pennylane'}
INFO:root:param: shots, value {1000, 100}
INFO:root:param: layers, value {1, 2, 3}


In [10]:
# save the model
model_path = retro_rl_model.save("latest")

print(f"You have built the nn model for RL and saved it as {model_path}")

INFO:root:finish save retrorl_model_latest.pickle


You have built the nn model for RL and saved it as ./retrorl_model_latest.pickle


In [11]:
!cp $model_path $data_path

In [12]:
!ls $data_path

Deadend.npy		  retrorl_model_latest.pickle  target_product.npy
buyable.npy		  smiles		       uspto50k.xlsx
ground_truth.npy	  smiles_dictionary.npy
reactions_dictionary.npy  smiles_map.npy


#### Step 3: Learn Retrosynthetic Planning

In this part, we use cpu to run classical model for retrosynthetic planning 
and simulators/NISQ devices to run quantum model for retrosysnthetic planning.

In [13]:
# model_path='./retrorl_model_latest.pickle'

# # get the model you want to optimize
# n_qubits = 8
# device = 'local'
# framework = 'pennylane'
# shots = 100
# layers = 1

# model_name = "{}_{}_{}_{}_{}".format(n_qubits, device, framework, shots, layers)
# method = "retro-qrl"

In [14]:
model_path='./retrorl_model_latest.pickle'

# get the model you want to optimize
device = 'local'
inputsize = 256
middlesize = 256
outputsize = 1

model_name = "{}_{}_{}".format(inputsize, middlesize, outputsize)
method = "retro-rl"

In [15]:
# train_mode can be: "local-instance", "local-job", "hybrid-job"
train_mode = "local-job"

In [16]:
data_path = 'data'
agent_param = {}
# please change the following s3 bucket to the one you can upload and download data
if train_mode == "local-job" or train_mode == "hybrid-job":
    s3_bucket_name = "s3://amazon-braket-us-west-1-002224604296"
    s3_data_path = f"{s3_bucket_name}/data"
    import os
    os.system(f"aws s3 sync {data_path} {s3_data_path}")

agent_param["data_path"] = data_path
agent_param["s3_data_path"]=s3_data_path
agent_param["train_mode"] = train_mode
agent_param["model_name"] = model_name
agent_param["model_path"] = model_path

retro_model = None
if train_mode == "local-instance":
    # get model
    retro_rl_model = RetroRLModel.load(model_path)
    retro_model = retro_rl_model.get_model(method, model_name)

retro_rl_agent = RetroRLAgent(retro_model, method, **agent_param)
retro_rl_agent.game_job()

job_arn = retro_rl_agent.get_job_arn()
print(f"create job with arn {job_arn}")

upload: data/Deadend.npy to s3://amazon-braket-us-west-1-002224604296/data/Deadend.npy
upload: data/smiles to s3://amazon-braket-us-west-1-002224604296/data/smiles
upload: data/smiles_map.npy to s3://amazon-braket-us-west-1-002224604296/data/smiles_map.npy
upload: data/buyable.npy to s3://amazon-braket-us-west-1-002224604296/data/buyable.npy
upload: data/ground_truth.npy to s3://amazon-braket-us-west-1-002224604296/data/ground_truth.npy
upload: data/target_product.npy to s3://amazon-braket-us-west-1-002224604296/data/target_product.npy
upload: data/reactions_dictionary.npy to s3://amazon-braket-us-west-1-002224604296/data/reactions_dictionary.npy
upload: data/retrorl_model_latest.pickle to s3://amazon-braket-us-west-1-002224604296/data/retrorl_model_latest.pickle
upload: data/uspto50k.xlsx to s3://amazon-braket-us-west-1-002224604296/data/uspto50k.xlsx
Completed 19.9 MiB/19.9 MiB (22.5 MiB/s) with 1 file(s) remaining

INFO:root:load data...
INFO:root:model is None
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


upload: data/smiles_dictionary.npy to s3://amazon-braket-us-west-1-002224604296/data/smiles_dictionary.npy
Going to run 256 mode


INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded
latest: Pulling from amazon-braket-pytorch-jobs
Digest: sha256:0fb89a8a8455e0483c0b8cebd26e946d9e15cf3bc659d2b438a65209181bb08e
Status: Image is up to date for 292282985366.dkr.ecr.us-west-1.amazonaws.com/amazon-braket-pytorch-jobs:latest
292282985366.dkr.ecr.us-west-1.amazonaws.com/amazon-braket-pytorch-jobs:latest


INFO:braket.jobs.local.local_job_container_setup:Using the long-lived AWS credentials found in session


Boto3 Version:  1.28.53
Beginning Setup
Checking for Additional Requirements
Additional Requirements Check Finished
Running Code As Process
Current Python Version- 3.10.8
{'method': 'retro-rl', 'model_name': '256_256_1', 'model_path': 'retrorl_model_latest.pickle', 'p': '2', 'max_parallel': '10', 'num_iterations': '5', 'stepsize': '0.1', 'shots': '1000', 'interface': 'torch', 'train_mode': 'local-job'}
INFO:root:load data...
INFO:root:model is {'model_name': '256_256_1', 'version': '1700300318', 'nn_model': Model(
  (relu): ReLU()
  (value_fc1): Linear(in_features=256, out_features=256, bias=True)
  (value_fc2): Linear(in_features=256, out_features=1, bias=True)
)}
episode 1
INFO:root:finish save 256_256_1_agent_latest.pickle
/opt/braket/input/data/data/256_256_1_agent_latest.pickle
upload: ../../../../braket/input/data/data/256_256_1_agent_latest.pickle to s3://amazon-braket-us-west-1-002224604296/jobs/retrorl-job-256-torch-1700300320/data/retrorl-job-256-torch-1700300320/output
Code 

In [15]:
# retro_rl_agent.get_job().cancel()

In [16]:
model_path='./retrorl_model_latest.pickle'

# get the model you want to optimize
inputsize = 256
middlesize = 512
outputsize = 1

model_name = "{}_{}_{}".format(inputsize, middlesize, outputsize)
method = "retro-rl"

# train_mode can be: "local-instance", "local-job", "hybrid-job"
train_mode = "hybrid-job"

In [None]:
data_path = 'data'
agent_param = {}
agent_param["data_path"] = data_path
agent_param["train_mode"] = train_mode
agent_param["model_name"] = model_name
agent_param["model_path"] = model_path

retro_rl_model = RetroRLModel.load(model_path)
model_info = retro_rl_model.describe_model()
retro_model = retro_rl_model.get_model(method, model_name)
retro_rl_agent = RetroRLAgent(retro_model, method, **agent_param)
retro_rl_agent.game()
retro_rl_agent.save("latest", path='data')

INFO:root:method: retro-rl
INFO:root:param: inputsize, value {256}
INFO:root:param: middlesize, value {256, 512, 1024}
INFO:root:param: outputsize, value {1}
INFO:root:method: retro-qrl
INFO:root:param: n_qubits, value {8}
INFO:root:param: device, value {'sv1', 'local', 'aspen-m3', 'aria-2'}
INFO:root:param: framework, value {'pennylane'}
INFO:root:param: shots, value {1000, 100}
INFO:root:param: layers, value {1, 2, 3}
INFO:root:load data...
INFO:root:model is {'model_name': '256_512_1', 'version': '1699750884', 'nn_model': Model(
  (relu): ReLU()
  (value_fc1): Linear(in_features=256, out_features=512, bias=True)
  (value_fc2): Linear(in_features=512, out_features=1, bias=True)
)}


episode 1
episode 2
episode 3
episode 4
episode 5
episode 6
episode 7
episode 8
episode 9
episode 10
episode 11
episode 12
episode 13
episode 14
episode 15
episode 16
episode 17
episode 18
episode 19
episode 20
episode 21
episode 22
episode 23
episode 24
episode 25
episode 26
episode 27
episode 28
episode 29
episode 30
epsiode 30 training...


  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


finish epoch 0 for 0.006285039583841959 minutes
finish epoch 1 for 3.1042098999023435e-05 minutes
finish epoch 2 for 1.4444192250569662e-05 minutes
finish epoch 3 for 1.5461444854736327e-05 minutes
finish epoch 4 for 1.4734268188476562e-05 minutes
finish epoch 5 for 1.447598139444987e-05 minutes
finish epoch 6 for 1.4313062032063802e-05 minutes
finish epoch 7 for 1.4301141103108724e-05 minutes
finish epoch 8 for 1.4762083689371744e-05 minutes
finish epoch 9 for 1.4472007751464844e-05 minutes
finish epoch 10 for 1.4793872833251952e-05 minutes
finish epoch 11 for 1.4309088389078776e-05 minutes
finish epoch 12 for 1.4980634053548178e-05 minutes
finish epoch 13 for 1.4658768971761068e-05 minutes
finish epoch 14 for 1.4408429463704426e-05 minutes
finish epoch 15 for 1.4249483744303385e-05 minutes
finish epoch 16 for 1.4249483744303385e-05 minutes
finish epoch 17 for 1.4996528625488281e-05 minutes
finish epoch 18 for 1.415411631266276e-05 minutes
finish epoch 19 for 1.409451166788737e-05 min

In [None]:
NN_path = retro_rl_agent.save("latest")

#### Step 4: PostProcess Result

In [None]:
# get results from trained model, let's use the one trained with hybrid job for example

In [21]:
!ls -alh output

-rw-rw-r-- 1 ubuntu ubuntu 12M Nov 18 08:40 output


In [18]:
!aws s3 cp s3://amazon-braket-us-west-1-002224604296/jobs/retrorl-job-256-torch-1700300320/data/retrorl-job-256-torch-1700300320/output .

download: s3://amazon-braket-us-west-1-002224604296/jobs/retrorl-job-256-torch-1700300320/data/retrorl-job-256-torch-1700300320/output to ./output


In [19]:
import sys
sys.path.append("hybridjobs")
retro_rl_agent_load = RetroRLAgent.load("./output")

In [25]:
target = 'O=C(NCc1ccc(CO)cc1)c1ccccn1'
retro_rl_agent_load.pathway(target)
layer2 = retro_rl_agent_load.layer2
print(layer2)
path = set()
for i, j in layer2.items():
    for k, l  in j.items():
        print(l)
        path.add(l)
print(f"The path of retro_rl_agent: \n \
 {path}")

input_data_path = agent_param["data_path"]
ground_truth = np.load(input_data_path+'/ground_truth.npy', allow_pickle=True).tolist()

real_path = set(ground_truth[target]['path'])
print(f"The real_path of Brute force: \n \
 {real_path}")
real_cost = ground_truth[target]['cost']
print(f"The real_cost of Brute force: \n \
 {real_cost}")
print(f"Get the same path: {path == real_path}")

{1: {'1': 'O=C(NCc1ccc(CO)cc1)c1ccccn1'}, 2: {'11': 'NCc1ccc(CO)cc1', '12': 'O=C(O)c1ccccn1'}, 3: {'111': 'N#Cc1ccc(CO)cc1'}, 4: {'1111': 'N#Cc1ccc(C=O)cc1'}, 5: {}, 6: {}, 7: {}, 8: {}, 9: {}, 10: {}}
O=C(NCc1ccc(CO)cc1)c1ccccn1
NCc1ccc(CO)cc1
O=C(O)c1ccccn1
N#Cc1ccc(CO)cc1
N#Cc1ccc(C=O)cc1
The path of retro_rl_agent: 
  {'N#Cc1ccc(CO)cc1', 'N#Cc1ccc(C=O)cc1', 'O=C(NCc1ccc(CO)cc1)c1ccccn1', 'O=C(O)c1ccccn1', 'NCc1ccc(CO)cc1'}
The real_path of Brute force: 
  {'N#Cc1ccc(CO)cc1', 'N#Cc1ccc(C=O)cc1', 'O=C(NCc1ccc(CO)cc1)c1ccccn1', 'O=C(O)c1ccccn1', 'NCc1ccc(CO)cc1'}
The real_cost of Brute force: 
  3.0
Get the same path: True


In [None]:
target = 'COC(Cc1ccc2oc(Cc3nc(-c4ccccc4)oc3C)cc2c1)OC'
retro_rl_agent.pathway(target)
layer2 = retro_rl_agent.layer2
# print(layer2)
path = set()
for i, j in layer2.items():
    for k, l  in j.items():
        path.add(l)
print(f"The path of retro_rl_agent: \n \
 {path}")

input_data_path = agent_param["data_path"]
ground_truth = np.load(input_data_path+'/ground_truth.npy', allow_pickle=True).tolist()

real_path = set(ground_truth[target]['path'])
print(f"The real_path of Brute force: \n \
 {real_path}")
real_cost = ground_truth[target]['cost']
print(f"The real_cost of Brute force: \n \
 {real_cost}")
print(f"Get the same path: {path == real_path}")

In [None]:
target = 'O=C(NCc1ccc(CO)cc1)c1ccccn1'
retro_rl_agent.pathway(target)
layer2 = retro_rl_agent.layer2
# print(layer2)
path = set()
for i, j in layer2.items():
    for k, l  in j.items():
        path.add(l)
print(f"The path of retro_rl_agent: \n \
 {path}")

input_data_path = agent_param["data_path"]
ground_truth = np.load(input_data_path+'/ground_truth.npy', allow_pickle=True).tolist()

real_path = set(ground_truth[target]['path'])
print(f"The real_path of Brute force: \n \
 {real_path}")
real_cost = ground_truth[target]['cost']
print(f"The real_cost of Brute force: \n \
 {real_cost}")
print(f"Get the same path: {path == real_path}")

In [None]:
target = 'CCCCC(CC)COC(=O)C(C#N)=C(c1ccccc1)c1ccccc1'
retro_rl_agent.pathway(target)
layer2 = retro_rl_agent.layer2
# print(layer2)
path = set()
for i, j in layer2.items():
    for k, l  in j.items():
        path.add(l)
print(f"The path of retro_rl_agent: \n \
 {path}")

input_data_path = agent_param["data_path"]
ground_truth = np.load(input_data_path+'/ground_truth.npy', allow_pickle=True).tolist()

real_path = set(ground_truth[target]['path'])
print(f"The real_path of Brute force: \n \
 {real_path}")
real_cost = ground_truth[target]['cost']
print(f"The real_cost of Brute force: \n \
 {real_cost}")
print(f"Get the same path: {path == real_path}")

# Hybrid Job Experiment

In [None]:
from braket.aws import AwsQuantumJob
from braket.jobs.config import InstanceConfig
import boto3
import json
import time
import altair as alt
import pandas as pd
import numpy as np
from hybridjobs.utility.HybridJobHelpers import *

#### Step 1: Prepare parameters for batch evaluation

In this part, we set the parameters for batch evaluation

In [None]:
# parameters for experiments
experiment_name = "retrosynthetic-planning"
data_path = "retrosynthetic-planning-data"
suffix_check = ["txt"]
experiments_params =  {
    "version": "1",
    "params": [
        {"n_qubits": [8]},
        {"framework": ['pennylane']},
        {"layers": [1,2,3]},
        {"shots": [100]},
        {"device": ['local']}
    ]
}

hp = {}
hybrid_job_params = []
parse_params(experiments_params['params'], hp, hybrid_job_params)

print(f"parameters for experiments: \n {hybrid_job_params}")

In [None]:
# parameters for experiments
experiment_name = "retrosynthetic-planning"
data_path = "retrosynthetic-planning-data"
suffix_check = ["txt"]
experiments_params =  {
    "version": "1",
    "params1": [
        {"n_qubits": [8]},
        {"framework": ['pennylane']},
        {"layers": [1,2,3]},
        {"shots": [100,1000]},
        {"device": ['local']}
    ],
    "params2": [
        {"inputsize": [256]},
        {"middlesize": [256,512,1024]},
        {"outputsize": [1]}
    ]
}

# hp = {}
# hybrid_job_params = []
# parse_params(experiments_params['params'], hp, hybrid_job_params)

# print(f"parameters for experiments: \n {hybrid_job_params}")

hybrid_job_params = []
for n_qubits in experiments_params['params1'][0]["n_qubits"]:
    for framework in experiments_params['params1'][1]["framework"]:
            for layers in experiments_params['params1'][2]["layers"]:
                for shots in experiments_params['params1'][3]["shots"]:
                    for device in experiments_params['params1'][4]["device"]:
                        model_name = "{}_{}_{}_{}_{}".format(n_qubits, device, framework, shots, layers)
                        hybrid_job_params.append(model_name)
for inputsize in experiments_params['params2'][0]["inputsize"]:
    for middlesize in experiments_params['params2'][1]["middlesize"]:
            for outputsize in experiments_params['params2'][2]["outputsize"]:                
                model_name = "{}_{}_{}".format(inputsize, middlesize, outputsize)
                hybrid_job_params.append(model_name)

In [None]:
hybrid_job_params

In [None]:
avtocost = []
model_path = "./retrorl_model_latest.pickle"
for i in hybrid_job_params:
#     model_name = hybrid_job_params[i]
    model_name = i
    if model_name[0] == "8":
        method = "retro-qrl"
    else:
        method = "retro-rl"
    

    # train_mode can be: "local-instance", "local-job", "hybrid-job"
    train_mode = "local-instance"

    data_path = 'data'
    agent_param = {}
    agent_param["data_path"] = data_path
    agent_param["train_mode"] = train_mode
    agent_param["model_name"] = model_name
    agent_param["model_path"] = model_path

    retro_model = None
    if train_mode == "local-instance":
        # get model
        retro_rl_model = RetroRLModel.load(model_path)
        model_info = retro_rl_model.describe_model()
        retro_model = retro_rl_model.get_model(method, model_name)

    retro_rl_agent = RetroRLAgent(retro_model, method, **agent_param)
    retro_rl_agent.game_job()
    avtocost.append(retro_rl_agent.avtocost)

In [None]:
# draw the training curve
import matplotlib.pyplot as plt
plt.figure(1)
plt.title('Training curve')
plt.xlabel('Epoch')
plt.ylabel('Average cost')
for i in range(len(hybrid_job_params)):
    plt.plot(range(0,len(avtocost[0])),avtocost[i],label=hybrid_job_params[i])
plt.legend(loc = 'upper right')