### CX Calibration with HPO under the new code architecture / workflow (DEC 2023)

In [1]:
import sys
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
import time
import yaml
import pickle
import optuna
module_path = os.path.abspath(os.path.join('/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control'))
if module_path not in sys.path:
    sys.path.append(module_path)

from quantumenvironment import QuantumEnvironment
from agent import Agent
from gate_level_abstraction import gate_q_env_config
from helper_functions import load_agent_from_yaml_file, create_agent_config
from ppo import make_train_ppo
from qconfig import QEnvConfig

import logging
logging.basicConfig(
    level=logging.WARNING,
    format="%(asctime)s INFO %(message)s", # hardcoded INFO level
    datefmt="%Y-%m-%d %H:%M:%S",
    stream=sys.stdout,
)



Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done for qubit 1 done.
All single qubit calibrations are done
Updated Instruction Schedule Map <InstructionScheduleMap(1Q instructions:
  q0: {'s', 'z', 'delay', 'measure', 'sx', 'reset', 'rz', 'id', 'h', 'x', 'sdg', 't', 'tdg'}
  q1: {'s', 'z', 'delay', 'measure', 'sx', 'reset', 'rz', 'id', 'h', 'x', 'sdg', 't', 'tdg'}
Multi qubit instructions:
  (0, 1): {'cr45m', 'cr45p', 'ecr'}
  (1, 0): {'cr45m', 'cr45p', 'ecr'}
)>
Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done fo

### Perform HPO

In [2]:
from hyperparameter_optimization import HyperparameterOptimizer

In [5]:
path_agent_config = '/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control/template_configurations/agent_config.yaml'
save_results_path = 'hpo_results'

In [6]:
optimizer = HyperparameterOptimizer(gate_q_env_config, path_agent_config, save_results_path, log_progress=True, num_hpo_trials=1)
optimizer.optimize_hyperparameters()

SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
2023-12-22 16:33:31 INFO num_HPO_trials: 1
2023-12-22 16:33:31 INFO ---------------- STARTING HPO ----------------


[I 2023-12-22 16:33:31,465] A new study created in memory with name: no-name-4a3a69ad-e068-44b5-90d3-82d2411f8147


SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


  0%|          | 0/43 [00:00<?, ?it/s]

SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


  2%|▏         | 1/43 [00:00<00:05,  7.00it/s]

mean tensor([ 0.1526,  0.1412, -0.1095, -0.1277,  0.0822, -0.0831,  0.1488])
Average return: 0.2531957692460124
DFE Rewards Mean: 0.2531957692460124
DFE Rewards standard dev 0.24009396381914827
Returns Mean: 0.4023117
Returns standard dev 0.76877826
Advantages Mean: 0.47485408
Advantages standard dev 0.7687782
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


  5%|▍         | 2/43 [00:00<00:05,  7.83it/s]

mean tensor([ 0.1043,  0.0517, -0.0940, -0.0958,  0.0356, -0.0537,  0.1027])
Average return: 0.3070529413932616
DFE Rewards Mean: 0.3070529413932616
DFE Rewards standard dev 0.2759850903176648
Returns Mean: 0.50941306
Returns standard dev 0.6288557
Advantages Mean: 0.44320843
Advantages standard dev 0.6288557
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


  7%|▋         | 3/43 [00:00<00:04,  8.18it/s]

mean tensor([ 0.1040,  0.1226, -0.0947, -0.1064,  0.1000, -0.0740,  0.1069])
Average return: 0.2613294063564847
DFE Rewards Mean: 0.2613294063564847
DFE Rewards standard dev 0.2477970660148366
Returns Mean: 0.39722168
Returns standard dev 0.495295
Advantages Mean: 0.14537904
Advantages standard dev 0.495295
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


  9%|▉         | 4/43 [00:00<00:04,  8.32it/s]

mean tensor([ 0.0791,  0.0240, -0.0805, -0.0793,  0.0379, -0.0458,  0.0770])
Average return: 0.3521320932920361
DFE Rewards Mean: 0.3521320932920361
DFE Rewards standard dev 0.29072378340857963
Returns Mean: 0.6791868
Returns standard dev 1.2544335
Advantages Mean: 0.4953712
Advantages standard dev 1.2544335
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 12%|█▏        | 5/43 [00:00<00:04,  8.41it/s]

mean tensor([ 0.0818,  0.0437, -0.0918, -0.0905,  0.0738, -0.0613,  0.0780])
Average return: 0.3902469339018495
DFE Rewards Mean: 0.3902469339018495
DFE Rewards standard dev 0.30004939085691273
Returns Mean: 0.8072568
Returns standard dev 1.4571792
Advantages Mean: 0.36860257
Advantages standard dev 1.4571792
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 14%|█▍        | 6/43 [00:00<00:04,  7.69it/s]

mean tensor([ 0.0669,  0.1320, -0.0956, -0.1106,  0.1325, -0.0834,  0.0966])
Average return: 0.27049739495783576
DFE Rewards Mean: 0.27049739495783576
DFE Rewards standard dev 0.2129223470281166
Returns Mean: 0.37430948
Returns standard dev 0.35562313
Advantages Mean: -0.48325723
Advantages standard dev 0.35562313
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 16%|█▋        | 7/43 [00:00<00:04,  7.87it/s]

mean tensor([ 0.0576,  0.1044, -0.0963, -0.1011,  0.1092, -0.0790,  0.0889])
Average return: 0.27731139719262443
DFE Rewards Mean: 0.27731139719262443
DFE Rewards standard dev 0.2079768259881092
Returns Mean: 0.38558418
Returns standard dev 0.38426203
Advantages Mean: -0.23495664
Advantages standard dev 0.38426203
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 19%|█▊        | 8/43 [00:00<00:04,  8.13it/s]

mean tensor([ 0.0617,  0.0760, -0.1050, -0.0895,  0.0871, -0.0692,  0.0895])
Average return: 0.2663249384870552
DFE Rewards Mean: 0.2663249384870552
DFE Rewards standard dev 0.20778601153426923
Returns Mean: 0.36752257
Returns standard dev 0.3652411
Advantages Mean: -0.05991945
Advantages standard dev 0.36524114
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 21%|██        | 9/43 [00:01<00:04,  8.42it/s]

mean tensor([ 0.0545,  0.0347, -0.0923, -0.0732,  0.0471, -0.0499,  0.0703])
Average return: 0.3730854497141464
DFE Rewards Mean: 0.3730854497141464
DFE Rewards standard dev 0.30044383182997564
Returns Mean: 0.7431047
Returns standard dev 1.2084435
Advantages Mean: 0.44550237
Advantages standard dev 1.2084435
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 23%|██▎       | 10/43 [00:01<00:03,  8.52it/s]

mean tensor([ 0.0579,  0.1006, -0.0994, -0.0635,  0.0964, -0.0541,  0.0802])
Average return: 0.31047546057217573
DFE Rewards Mean: 0.31047546057217573
DFE Rewards standard dev 0.2656981432297469
Returns Mean: 0.49428567
Returns standard dev 0.5660423
Advantages Mean: -0.113070555
Advantages standard dev 0.5660423
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 26%|██▌       | 11/43 [00:01<00:03,  8.60it/s]

mean tensor([ 0.0454,  0.0130, -0.0809, -0.0550,  0.0366, -0.0239,  0.0587])
Average return: 0.3874818761278917
DFE Rewards Mean: 0.3874818761278917
DFE Rewards standard dev 0.28215984818198175
Returns Mean: 0.690816
Returns standard dev 0.9126187
Advantages Mean: 0.31656954
Advantages standard dev 0.91261864
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 28%|██▊       | 12/43 [00:01<00:03,  8.62it/s]

mean tensor([ 0.0421,  0.1082, -0.0969, -0.0573,  0.1128, -0.0406,  0.0800])
Average return: 0.3119254020532925
DFE Rewards Mean: 0.3119254020532925
DFE Rewards standard dev 0.270364808087946
Returns Mean: 0.51219356
Returns standard dev 0.64920646
Advantages Mean: -0.16693926
Advantages standard dev 0.64920646
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 30%|███       | 13/43 [00:01<00:03,  8.69it/s]

mean tensor([ 0.0456,  0.1269, -0.0990, -0.0617,  0.1174, -0.0500,  0.0813])
Average return: 0.29269405605258647
DFE Rewards Mean: 0.29269405605258647
DFE Rewards standard dev 0.20849447078143435
Returns Mean: 0.4143587
Returns standard dev 0.4284049
Advantages Mean: -0.15074192
Advantages standard dev 0.4284049
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 33%|███▎      | 14/43 [00:01<00:03,  8.51it/s]

mean tensor([ 0.0376,  0.0866, -0.1022, -0.0511,  0.0926, -0.0425,  0.0662])
Average return: 0.2860079960241012
DFE Rewards Mean: 0.2860079960241012
DFE Rewards standard dev 0.21328534010547842
Returns Mean: 0.398384
Returns standard dev 0.37225324
Advantages Mean: -0.020907851
Advantages standard dev 0.37225324
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 35%|███▍      | 15/43 [00:01<00:03,  8.32it/s]

mean tensor([ 0.0336,  0.0832, -0.0950, -0.0475,  0.0786, -0.0469,  0.0609])
Average return: 0.4278575232810212
DFE Rewards Mean: 0.4278575232810212
DFE Rewards standard dev 0.25963892389497306
Returns Mean: 0.77103204
Returns standard dev 1.0798277
Advantages Mean: 0.3746568
Advantages standard dev 1.0798277
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 37%|███▋      | 16/43 [00:01<00:03,  8.50it/s]

mean tensor([ 0.0224,  0.0396, -0.0831, -0.0413,  0.0535, -0.0279,  0.0510])
Average return: 0.45299071570158683
DFE Rewards Mean: 0.45299071570158683
DFE Rewards standard dev 0.28891682487564935
Returns Mean: 0.9644837
Returns standard dev 1.6436696
Advantages Mean: 0.47349626
Advantages standard dev 1.6436695
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 40%|███▉      | 17/43 [00:02<00:03,  8.53it/s]

mean tensor([ 0.0233,  0.0622, -0.0867, -0.0514,  0.0713, -0.0414,  0.0500])
Average return: 0.5688291661391138
DFE Rewards Mean: 0.5688291661391138
DFE Rewards standard dev 0.27000110109608666
Returns Mean: 1.1822319
Returns standard dev 1.3057464
Advantages Mean: 0.4893538
Advantages standard dev 1.3057463
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 42%|████▏     | 18/43 [00:02<00:02,  8.48it/s]

mean tensor([ 0.0198,  0.0570, -0.0923, -0.0495,  0.0524, -0.0374,  0.0523])
Average return: 0.4938118962481324
DFE Rewards Mean: 0.4938118962481324
DFE Rewards standard dev 0.2903894424435958
Returns Mean: 1.0426556
Returns standard dev 1.381794
Advantages Mean: 0.23103745
Advantages standard dev 1.381794
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 44%|████▍     | 19/43 [00:02<00:02,  8.65it/s]

mean tensor([ 0.0165,  0.0680, -0.1024, -0.0365,  0.0529, -0.0335,  0.0305])
Average return: 0.6011375875909659
DFE Rewards Mean: 0.6011375875909659
DFE Rewards standard dev 0.2696618834368499
Returns Mean: 1.3339927
Returns standard dev 1.4119284
Advantages Mean: 0.34985238
Advantages standard dev 1.4119284
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 47%|████▋     | 20/43 [00:02<00:02,  8.58it/s]

mean tensor([ 0.0133,  0.0576, -0.1040, -0.0361,  0.0320, -0.0259,  0.0361])
Average return: 0.4831308797550565
DFE Rewards Mean: 0.4831308797550565
DFE Rewards standard dev 0.27168014821675157
Returns Mean: 0.90679944
Returns standard dev 1.1034021
Advantages Mean: -0.11673741
Advantages standard dev 1.1034021
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 49%|████▉     | 21/43 [00:02<00:02,  8.52it/s]

mean tensor([ 0.0093,  0.0305, -0.0994, -0.0406,  0.0155, -0.0230,  0.0229])
Average return: 0.4799407835421096
DFE Rewards Mean: 0.4799407835421096
DFE Rewards standard dev 0.2842666988967716
Returns Mean: 0.93975043
Returns standard dev 1.1770289
Advantages Mean: 0.085362606
Advantages standard dev 1.1770289
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 51%|█████     | 22/43 [00:02<00:02,  8.60it/s]

mean tensor([ 0.0152,  0.0489, -0.1049, -0.0401,  0.0118, -0.0335,  0.0303])
Average return: 0.5123914602328657
DFE Rewards Mean: 0.5123914602328657
DFE Rewards standard dev 0.2952562417027061
Returns Mean: 1.0642045
Returns standard dev 1.1731392
Advantages Mean: 0.105268955
Advantages standard dev 1.1731391
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 53%|█████▎    | 23/43 [00:02<00:02,  8.69it/s]

mean tensor([ 0.0212,  0.1476, -0.1253, -0.0261,  0.0478, -0.0566,  0.0290])
Average return: 0.41800036363271786
DFE Rewards Mean: 0.41800036363271786
DFE Rewards standard dev 0.29880225952842016
Returns Mean: 0.8827379
Returns standard dev 1.5467803
Advantages Mean: -0.47709504
Advantages standard dev 1.5467803
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 56%|█████▌    | 24/43 [00:02<00:02,  8.55it/s]

mean tensor([ 0.0110,  0.0740, -0.1036, -0.0326,  0.0156, -0.0401,  0.0231])
Average return: 0.536264994073699
DFE Rewards Mean: 0.536264994073699
DFE Rewards standard dev 0.25544228450819245
Returns Mean: 1.1075577
Returns standard dev 1.4532394
Advantages Mean: 0.0854386
Advantages standard dev 1.4532393
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 58%|█████▊    | 25/43 [00:02<00:02,  8.32it/s]

mean tensor([ 0.0136,  0.1154, -0.1264, -0.0255,  0.0367, -0.0401,  0.0162])
Average return: 0.32638703249553414
DFE Rewards Mean: 0.32638703249553414
DFE Rewards standard dev 0.24951256857106602
Returns Mean: 0.49730995
Returns standard dev 0.5007208
Advantages Mean: -0.78573173
Advantages standard dev 0.5007208
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 60%|██████    | 26/43 [00:03<00:02,  8.31it/s]

mean tensor([ 0.0142,  0.0896, -0.1005, -0.0197,  0.0272, -0.0369,  0.0159])
Average return: 0.6160319121415145
DFE Rewards Mean: 0.6160319121415145
DFE Rewards standard dev 0.27605947071680265
Returns Mean: 1.4136288
Returns standard dev 1.4533576
Advantages Mean: 0.39008135
Advantages standard dev 1.4533576
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 63%|██████▎   | 27/43 [00:03<00:01,  8.42it/s]

mean tensor([ 0.0218,  0.1347, -0.1028, -0.0059,  0.0423, -0.0394,  0.0076])
Average return: 0.37736354936856603
DFE Rewards Mean: 0.37736354936856603
DFE Rewards standard dev 0.26450897300553733
Returns Mean: 0.6053374
Returns standard dev 0.5694484
Advantages Mean: -0.678222
Advantages standard dev 0.5694484
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 65%|██████▌   | 28/43 [00:03<00:01,  8.41it/s]

mean tensor([ 0.0258,  0.1382, -0.1097, -0.0002,  0.0484, -0.0427,  0.0030])
Average return: 0.41712319816178794
DFE Rewards Mean: 0.41712319816178794
DFE Rewards standard dev 0.26035474397870356
Returns Mean: 0.68153584
Returns standard dev 0.5978771
Advantages Mean: -0.49334392
Advantages standard dev 0.5978771
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 67%|██████▋   | 29/43 [00:03<00:01,  8.51it/s]

mean tensor([ 0.0193,  0.1071, -0.0999, -0.0047,  0.0353, -0.0366,  0.0092])
Average return: 0.41195098962568427
DFE Rewards Mean: 0.41195098962568427
DFE Rewards standard dev 0.26020913240895516
Returns Mean: 0.6756541
Returns standard dev 0.61891603
Advantages Mean: -0.30610946
Advantages standard dev 0.61891603
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 70%|██████▉   | 30/43 [00:03<00:01,  8.56it/s]

mean tensor([ 0.0281,  0.1576, -0.1147,  0.0125,  0.0537, -0.0414,  0.0068])
Average return: 0.5365385527005682
DFE Rewards Mean: 0.5365385527005682
DFE Rewards standard dev 0.3068712925948447
Returns Mean: 1.1471194
Returns standard dev 1.18075
Advantages Mean: 0.24131905
Advantages standard dev 1.1807501
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 72%|███████▏  | 31/43 [00:03<00:01,  8.62it/s]

mean tensor([ 0.0121,  0.0922, -0.1049, -0.0027,  0.0242, -0.0304,  0.0214])
Average return: 0.6607620609736664
DFE Rewards Mean: 0.6607620609736664
DFE Rewards standard dev 0.22761845711569445
Returns Mean: 1.4925593
Returns standard dev 1.5345267
Advantages Mean: 0.5587884
Advantages standard dev 1.5345267
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 74%|███████▍  | 32/43 [00:03<00:01,  8.71it/s]

mean tensor([ 0.0091,  0.1087, -0.1092,  0.0096,  0.0192, -0.0278,  0.0221])
Average return: 0.472213912589224
DFE Rewards Mean: 0.472213912589224
DFE Rewards standard dev 0.2700303399633349
Returns Mean: 0.81764686
Returns standard dev 0.67216647
Advantages Mean: -0.3034441
Advantages standard dev 0.67216647
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 77%|███████▋  | 33/43 [00:03<00:01,  8.41it/s]

mean tensor([ 0.0193,  0.1358, -0.1049,  0.0176,  0.0275, -0.0317,  0.0202])
Average return: 0.5496858569694064
DFE Rewards Mean: 0.5496858569694064
DFE Rewards standard dev 0.26810337080542357
Returns Mean: 1.0349097
Returns standard dev 0.7800985
Advantages Mean: -0.0071006496
Advantages standard dev 0.7800985
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 79%|███████▉  | 34/43 [00:04<00:01,  8.37it/s]

mean tensor([ 0.0120,  0.0911, -0.0909,  0.0095,  0.0196, -0.0239,  0.0196])
Average return: 0.5326080001850744
DFE Rewards Mean: 0.5326080001850744
DFE Rewards standard dev 0.25663986305415815
Returns Mean: 0.95669764
Returns standard dev 0.70036405
Advantages Mean: -0.015558004
Advantages standard dev 0.70036405
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 81%|████████▏ | 35/43 [00:04<00:00,  8.55it/s]

mean tensor([-0.0003,  0.0249, -0.0808, -0.0114, -0.0019, -0.0138,  0.0265])
Average return: 0.6270355499594203
DFE Rewards Mean: 0.6270355499594203
DFE Rewards standard dev 0.25604096904769935
Returns Mean: 1.3208612
Returns standard dev 0.9872326
Advantages Mean: 0.49415126
Advantages standard dev 0.9872327
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 84%|████████▎ | 36/43 [00:04<00:00,  8.64it/s]

mean tensor([ 7.8782e-04,  5.0930e-02, -8.2289e-02, -4.6649e-03, -2.6120e-05,
        -2.1956e-02,  1.6876e-02])
Average return: 0.6727159950283809
DFE Rewards Mean: 0.6727159950283809
DFE Rewards standard dev 0.2233997409231454
Returns Mean: 1.5840625
Returns standard dev 1.737335
Advantages Mean: 0.56779355
Advantages standard dev 1.737335
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 86%|████████▌ | 37/43 [00:04<00:00,  8.77it/s]

mean tensor([-0.0016,  0.0231, -0.0665, -0.0069, -0.0047, -0.0110,  0.0200])
Average return: 0.6600501526780984
DFE Rewards Mean: 0.6600501526780984
DFE Rewards standard dev 0.24781524217993822
Returns Mean: 1.5899025
Returns standard dev 1.7042781
Advantages Mean: 0.48743743
Advantages standard dev 1.7042782
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 88%|████████▊ | 38/43 [00:04<00:00,  8.80it/s]

mean tensor([ 0.0050,  0.0565, -0.0712,  0.0010,  0.0131, -0.0111,  0.0103])
Average return: 0.7948459626691454
DFE Rewards Mean: 0.7948459626691454
DFE Rewards standard dev 0.16858950593688637
Returns Mean: 2.158783
Returns standard dev 1.7894422
Advantages Mean: 0.82487774
Advantages standard dev 1.7894422
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 91%|█████████ | 39/43 [00:04<00:00,  8.70it/s]

mean tensor([ 0.0142,  0.1178, -0.0780,  0.0218,  0.0266, -0.0195,  0.0014])
Average return: 0.6430798021897363
DFE Rewards Mean: 0.6430798021897363
DFE Rewards standard dev 0.2712890281604358
Returns Mean: 1.5651108
Returns standard dev 1.732519
Advantages Mean: -0.1127567
Advantages standard dev 1.732519
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 93%|█████████▎| 40/43 [00:04<00:00,  8.64it/s]

mean tensor([ 0.0038,  0.0721, -0.0710,  0.0061,  0.0119, -0.0098,  0.0091])
Average return: 0.8028039526107523
DFE Rewards Mean: 0.8028039526107523
DFE Rewards standard dev 0.16684644381678043
Returns Mean: 2.2658317
Returns standard dev 2.0414073
Advantages Mean: 0.8243318
Advantages standard dev 2.0414073
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 95%|█████████▌| 41/43 [00:04<00:00,  8.45it/s]

mean tensor([ 0.0016,  0.0842, -0.0834,  0.0078,  0.0158, -0.0017,  0.0087])
Average return: 0.8591537215265783
DFE Rewards Mean: 0.8591537215265783
DFE Rewards standard dev 0.12781483643624447
Returns Mean: 2.945434
Returns standard dev 2.7289417
Advantages Mean: 1.3173258
Advantages standard dev 2.7289417
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 98%|█████████▊| 42/43 [00:04<00:00,  8.48it/s]

mean tensor([ 0.0117,  0.1318, -0.0913,  0.0086,  0.0251, -0.0032,  0.0054])
Average return: 0.6583723935025318
DFE Rewards Mean: 0.6583723935025318
DFE Rewards standard dev 0.2619562619242248
Returns Mean: 1.4736181
Returns standard dev 1.1431583
Advantages Mean: -0.49954543
Advantages standard dev 1.1431583
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


100%|██████████| 43/43 [00:05<00:00,  8.46it/s]
[I 2023-12-22 16:33:36,619] Trial 0 finished with value: 0.8028039526107523 and parameters: {'N_UPDATES': 43, 'N_EPOCHS': 12, 'MINIBATCH_SIZE': 96, 'BATCHSIZE_MULTIPLIER': 5, 'LR': 0.00020217812911653925, 'GAMMA': 0.9973666424434683, 'GAE_LAMBDA': 0.9353074276604315, 'ENT_COEF': 0.000676622766506395, 'V_COEF': 0.6663364765117363, 'GRADIENT_CLIP': 0.6642762738760909, 'CLIP_VALUE_COEF': 0.14190218756485964, 'CLIP_RATIO': 0.10016907873814065}. Best is trial 0 with value: 0.8028039526107523.


mean tensor([ 0.0119,  0.1381, -0.1040,  0.0084,  0.0268, -0.0021,  0.0015])
Average return: 0.7337056172041765
DFE Rewards Mean: 0.7337056172041765
DFE Rewards standard dev 0.22700195746952798
Returns Mean: 1.8619848
Returns standard dev 1.6238471
Advantages Mean: -0.0061604204
Advantages standard dev 1.6238471
Fidelity History: []
2023-12-22 16:33:36 INFO ---------------- FINISHED HPO ----------------
2023-12-22 16:33:36 INFO HPO completed in 5.16 seconds.
2023-12-22 16:33:36 INFO Best trial:
2023-12-22 16:33:36 INFO -------------------------
2023-12-22 16:33:36 INFO   Value: 0.8028039526107523
2023-12-22 16:33:36 INFO   Parameters: 
2023-12-22 16:33:36 INFO     N_UPDATES: 43
2023-12-22 16:33:36 INFO     N_EPOCHS: 12
2023-12-22 16:33:36 INFO     MINIBATCH_SIZE: 96
2023-12-22 16:33:36 INFO     BATCHSIZE_MULTIPLIER: 5
2023-12-22 16:33:36 INFO     LR: 0.00020217812911653925
2023-12-22 16:33:36 INFO     GAMMA: 0.9973666424434683
2023-12-22 16:33:36 INFO     GAE_LAMBDA: 0.9353074276604315

In [7]:
optimizer.hyperparams

['N_UPDATES',
 'N_EPOCHS',
 'MINIBATCH_SIZE',
 'BATCHSIZE_MULTIPLIER',
 'LR',
 'GAMMA',
 'GAE_LAMBDA',
 'ENT_COEF',
 'V_COEF',
 'GRADIENT_CLIP',
 'CLIP_VALUE_COEF',
 'CLIP_RATIO',
 'BATCHSIZE']

In [8]:
optimizer.num_hpo_trials

1

In [9]:
optimizer.best_hpo_configuration

{'best_avg_return': 0.8028039526107523,
 'best_hyperparams': {'N_UPDATES': 43,
  'N_EPOCHS': 12,
  'MINIBATCH_SIZE': 96,
  'BATCHSIZE_MULTIPLIER': 5,
  'LR': 0.00020217812911653925,
  'GAMMA': 0.9973666424434683,
  'GAE_LAMBDA': 0.9353074276604315,
  'ENT_COEF': 0.000676622766506395,
  'V_COEF': 0.6663364765117363,
  'GRADIENT_CLIP': 0.6642762738760909,
  'CLIP_VALUE_COEF': 0.14190218756485964,
  'CLIP_RATIO': 0.10016907873814065}}