### CX Calibration with HPO under the new code architecture / workflow (DEC 2023)

In [1]:
import sys
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
import time
import yaml
import pickle
import optuna
module_path = os.path.abspath(os.path.join('/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control'))
if module_path not in sys.path:
    sys.path.append(module_path)

from quantumenvironment import QuantumEnvironment
from agent import Agent
from gate_level_abstraction import gate_q_env_config
from helper_functions import load_agent_from_yaml_file, create_agent_config
from ppo import make_train_ppo
from qconfig import QEnvConfig

import logging
logging.basicConfig(
    level=logging.WARNING,
    format="%(asctime)s INFO %(message)s", # hardcoded INFO level
    datefmt="%Y-%m-%d %H:%M:%S",
    stream=sys.stdout,
)



Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done for qubit 1 done.
All single qubit calibrations are done
Updated Instruction Schedule Map <InstructionScheduleMap(1Q instructions:
  q0: {'tdg', 'reset', 'id', 'h', 'measure', 's', 't', 'rz', 'z', 'x', 'sdg', 'sx', 'delay'}
  q1: {'tdg', 'reset', 'id', 'h', 'measure', 's', 't', 'rz', 'z', 'x', 'sdg', 'sx', 'delay'}
Multi qubit instructions:
  (0, 1): {'ecr', 'cr45p', 'cr45m'}
  (1, 0): {'ecr', 'cr45p', 'cr45m'}
)>
Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done fo

### Perform HPO

In [2]:
from hyperparameter_optimization import HyperparameterOptimizer

In [3]:
path_agent_config = 'agent_config.yaml'
save_results_path = 'hpo_results'

In [6]:
optimizer = HyperparameterOptimizer(gate_q_env_config, path_agent_config, save_results_path, False)
optimizer.optimize_hyperparameters()

SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
2023-12-22 16:17:11 INFO num_HPO_trials: 1
2023-12-22 16:17:11 INFO ---------------- STARTING HPO ----------------


[I 2023-12-22 16:17:11,478] A new study created in memory with name: no-name-0eb651e7-1a4f-4c83-936c-ec82b3d6ea58


SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


  0%|          | 0/47 [00:00<?, ?it/s]

SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


  2%|▏         | 1/47 [00:00<00:07,  6.55it/s]

mean tensor([ 0.0596, -0.1140,  0.0195, -0.0767,  0.1239, -0.0562, -0.0160])
Average return: 0.3183005641658437
DFE Rewards Mean: 0.3183005641658437
DFE Rewards standard dev 0.28502918711429776
Returns Mean: 0.6248791
Returns standard dev 1.3042903
Advantages Mean: 0.73127294
Advantages standard dev 1.3042903
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


  4%|▍         | 2/47 [00:00<00:06,  7.09it/s]

mean tensor([-0.0438, -0.1068, -0.1052, -0.0944,  0.1731,  0.0053,  0.0079])
Average return: 0.25941635023654963
DFE Rewards Mean: 0.25941635023654963
DFE Rewards standard dev 0.2062260337342948
Returns Mean: 0.36241472
Returns standard dev 0.40003636
Advantages Mean: 0.20796214
Advantages standard dev 0.40003633
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


  6%|▋         | 3/47 [00:00<00:06,  7.33it/s]

mean tensor([ 0.0173, -0.0777, -0.0577, -0.0732,  0.1470, -0.0343,  0.0447])
Average return: 0.40525153054057506
DFE Rewards Mean: 0.40525153054057506
DFE Rewards standard dev 0.27608236784271545
Returns Mean: 0.7453237
Returns standard dev 1.0781214
Advantages Mean: 0.6164589
Advantages standard dev 1.0781214
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


  9%|▊         | 4/47 [00:00<00:05,  7.18it/s]

mean tensor([ 0.0204, -0.0861, -0.0346, -0.0412,  0.1526, -0.0492,  0.0367])
Average return: 0.45747462320792237
DFE Rewards Mean: 0.45747462320792237
DFE Rewards standard dev 0.2861741138563314
Returns Mean: 0.9090042
Returns standard dev 1.1809882
Advantages Mean: 0.6948104
Advantages standard dev 1.1809882
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 11%|█         | 5/47 [00:00<00:05,  7.34it/s]

mean tensor([-0.0138, -0.0814, -0.0462, -0.0450,  0.1288, -0.0465,  0.0425])
Average return: 0.4924708393426251
DFE Rewards Mean: 0.4924708393426251
DFE Rewards standard dev 0.27202346215485285
Returns Mean: 0.92365986
Returns standard dev 0.8927479
Advantages Mean: 0.49649313
Advantages standard dev 0.8927479
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 13%|█▎        | 6/47 [00:00<00:05,  7.50it/s]

mean tensor([-0.0573, -0.0716, -0.1088, -0.0388,  0.1399,  0.0211,  0.0065])
Average return: 0.2767978256593406
DFE Rewards Mean: 0.2767978256593406
DFE Rewards standard dev 0.21866297599260076
Returns Mean: 0.39012897
Returns standard dev 0.39495185
Advantages Mean: -0.26302618
Advantages standard dev 0.39495185
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 15%|█▍        | 7/47 [00:00<00:05,  7.61it/s]

mean tensor([-0.0376,  0.0246, -0.0603, -0.0350,  0.1628, -0.0119,  0.0110])
Average return: 0.29947820062989616
DFE Rewards Mean: 0.29947820062989616
DFE Rewards standard dev 0.21876861654926685
Returns Mean: 0.42335513
Returns standard dev 0.39345938
Advantages Mean: -0.06390491
Advantages standard dev 0.39345935
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 17%|█▋        | 8/47 [00:01<00:05,  7.61it/s]

mean tensor([-0.0726, -0.0042, -0.0801, -0.0441,  0.1758,  0.0317,  0.0427])
Average return: 0.3118066960026456
DFE Rewards Mean: 0.3118066960026456
DFE Rewards standard dev 0.23609717378921163
Returns Mean: 0.45932272
Returns standard dev 0.45526683
Advantages Mean: -0.011525213
Advantages standard dev 0.45526683
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 19%|█▉        | 9/47 [00:01<00:05,  7.45it/s]

mean tensor([-0.0062, -0.0198, -0.0516, -0.0289,  0.1256,  0.0143,  0.0212])
Average return: 0.3937940211354658
DFE Rewards Mean: 0.3937940211354658
DFE Rewards standard dev 0.23485368715522129
Returns Mean: 0.6036011
Returns standard dev 0.5053097
Advantages Mean: 0.21249366
Advantages standard dev 0.5053097
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 21%|██▏       | 10/47 [00:01<00:04,  7.46it/s]

mean tensor([-0.0438, -0.0284, -0.0632, -0.0305,  0.1019,  0.0960,  0.0134])
Average return: 0.4285797212151008
DFE Rewards Mean: 0.4285797212151008
DFE Rewards standard dev 0.2559797149018743
Returns Mean: 0.6949809
Returns standard dev 0.56991595
Advantages Mean: 0.13126692
Advantages standard dev 0.56991595
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 23%|██▎       | 11/47 [00:01<00:04,  7.55it/s]

mean tensor([-0.0057, -0.0437, -0.0044, -0.0049,  0.0681,  0.0054, -0.0055])
Average return: 0.5930752434964252
DFE Rewards Mean: 0.5930752434964252
DFE Rewards standard dev 0.25849293905380694
Returns Mean: 1.3235503
Returns standard dev 1.6944085
Advantages Mean: 0.8146222
Advantages standard dev 1.6944085
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 26%|██▌       | 12/47 [00:01<00:04,  7.72it/s]

mean tensor([-0.0606, -0.0110,  0.0472, -0.0847,  0.0261,  0.0596,  0.0145])
Average return: 0.5142560430214106
DFE Rewards Mean: 0.5142560430214106
DFE Rewards standard dev 0.26074304216992383
Returns Mean: 0.9807756
Returns standard dev 1.0954037
Advantages Mean: 0.1925676
Advantages standard dev 1.0954038
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 28%|██▊       | 13/47 [00:01<00:04,  7.78it/s]

mean tensor([-0.1128,  0.0176,  0.0275, -0.0966, -0.0263,  0.1196, -0.0035])
Average return: 0.6456294730388059
DFE Rewards Mean: 0.6456294730388059
DFE Rewards standard dev 0.19578079723152994
Returns Mean: 1.2254058
Returns standard dev 0.67905855
Advantages Mean: 0.14775433
Advantages standard dev 0.67905855
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 30%|██▉       | 14/47 [00:01<00:04,  7.89it/s]

mean tensor([-0.0596, -0.0148,  0.0259, -0.0721, -0.0038,  0.0544, -0.0275])
Average return: 0.7086561706510748
DFE Rewards Mean: 0.7086561706510748
DFE Rewards standard dev 0.19463519204255328
Returns Mean: 1.5423461
Returns standard dev 1.1085557
Advantages Mean: 0.5157103
Advantages standard dev 1.1085557
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 32%|███▏      | 15/47 [00:01<00:03,  8.04it/s]

mean tensor([-0.0274, -0.0541,  0.0519, -0.0331,  0.0147,  0.0049, -0.0330])
Average return: 0.8397851124317403
DFE Rewards Mean: 0.8397851124317403
DFE Rewards standard dev 0.13011455448763726
Returns Mean: 2.5392618
Returns standard dev 2.2588441
Advantages Mean: 1.4973941
Advantages standard dev 2.2588441
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 34%|███▍      | 16/47 [00:02<00:03,  7.76it/s]

mean tensor([ 0.0114,  0.0023,  0.0495, -0.0417, -0.0135,  0.0155, -0.0457])
Average return: 0.9023146655775321
DFE Rewards Mean: 0.9023146655775321
DFE Rewards standard dev 0.09610119993734136
Returns Mean: 3.4002283
Returns standard dev 2.7891428
Advantages Mean: 2.1794536
Advantages standard dev 2.7891428
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 36%|███▌      | 17/47 [00:02<00:03,  7.86it/s]

mean tensor([-0.0046,  0.0133,  0.0683, -0.0541, -0.0181,  0.0786, -0.0415])
Average return: 0.8010226304396593
DFE Rewards Mean: 0.8010226304396593
DFE Rewards standard dev 0.16214758608385532
Returns Mean: 2.2711549
Returns standard dev 2.2400491
Advantages Mean: 0.66431165
Advantages standard dev 2.2400491
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 38%|███▊      | 18/47 [00:02<00:03,  7.86it/s]

mean tensor([-0.0144, -0.0404,  0.0469, -0.0081, -0.0055,  0.0293,  0.0023])
Average return: 0.92985382148669
DFE Rewards Mean: 0.92985382148669
DFE Rewards standard dev 0.07465098104036896
Returns Mean: 3.8210504
Returns standard dev 2.8956652
Advantages Mean: 2.0024297
Advantages standard dev 2.8956652
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 40%|████      | 19/47 [00:02<00:03,  7.57it/s]

mean tensor([ 0.0016, -0.0602,  0.0447, -0.0078,  0.0028,  0.0125, -0.0241])
Average return: 0.9261176214486917
DFE Rewards Mean: 0.9261176214486917
DFE Rewards standard dev 0.07661494120506213
Returns Mean: 3.439433
Returns standard dev 2.3904123
Advantages Mean: 1.5636843
Advantages standard dev 2.3904123
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 43%|████▎     | 20/47 [00:02<00:03,  7.61it/s]

mean tensor([ 0.0035, -0.0519,  0.0345, -0.0096,  0.0386,  0.0104, -0.0084])
Average return: 0.9410680779063844
DFE Rewards Mean: 0.9410680779063844
DFE Rewards standard dev 0.054449839593851544
Returns Mean: 4.0923557
Returns standard dev 3.2152846
Advantages Mean: 1.9445243
Advantages standard dev 3.2152846
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 45%|████▍     | 21/47 [00:02<00:03,  7.42it/s]

mean tensor([ 0.0222, -0.0668,  0.0764, -0.0082,  0.0651, -0.0521, -0.0308])
Average return: 0.9511696935495336
DFE Rewards Mean: 0.9511696935495336
DFE Rewards standard dev 0.049830319427762094
Returns Mean: 4.106264
Returns standard dev 2.8189895
Advantages Mean: 2.1045756
Advantages standard dev 2.8189895
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 47%|████▋     | 22/47 [00:02<00:03,  7.35it/s]

mean tensor([ 0.0031, -0.0477,  0.0976, -0.0004,  0.0358, -0.0644, -0.0348])
Average return: 0.9566998981325572
DFE Rewards Mean: 0.9566998981325572
DFE Rewards standard dev 0.04204086029093229
Returns Mean: 4.3505635
Returns standard dev 3.0402412
Advantages Mean: 2.0571208
Advantages standard dev 3.0402412
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 49%|████▉     | 23/47 [00:03<00:03,  7.21it/s]

mean tensor([-0.0188, -0.0406,  0.1245,  0.0029, -0.0024,  0.0021, -0.0215])
Average return: 0.9565580152349208
DFE Rewards Mean: 0.9565580152349208
DFE Rewards standard dev 0.05667573757280805
Returns Mean: 5.062687
Returns standard dev 3.8378766
Advantages Mean: 2.3710263
Advantages standard dev 3.8378766
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 51%|█████     | 24/47 [00:03<00:03,  7.33it/s]

mean tensor([ 0.0101, -0.0735,  0.0918,  0.0002,  0.0197, -0.0299, -0.0316])
Average return: 0.9372028995206154
DFE Rewards Mean: 0.9372028995206154
DFE Rewards standard dev 0.07740789064447855
Returns Mean: 4.266731
Returns standard dev 3.2997484
Advantages Mean: 2.1316054
Advantages standard dev 3.2997484
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 53%|█████▎    | 25/47 [00:03<00:02,  7.50it/s]

mean tensor([-0.0770, -0.0921,  0.1019,  0.0517, -0.0292, -0.0058, -0.0064])
Average return: 0.9638011086291007
DFE Rewards Mean: 0.9638011086291007
DFE Rewards standard dev 0.026881076698676697
Returns Mean: 4.280676
Returns standard dev 2.8822005
Advantages Mean: 0.93426895
Advantages standard dev 2.8822005
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 55%|█████▌    | 26/47 [00:03<00:02,  7.62it/s]

mean tensor([-0.0365, -0.1566,  0.1269,  0.0072, -0.0348,  0.0442,  0.0030])
Average return: 0.969122537881409
DFE Rewards Mean: 0.969122537881409
DFE Rewards standard dev 0.029264222946431107
Returns Mean: 4.8140917
Returns standard dev 3.3416452
Advantages Mean: 1.1741723
Advantages standard dev 3.3416452
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 57%|█████▋    | 27/47 [00:03<00:02,  7.75it/s]

mean tensor([ 0.0488, -0.1769,  0.1508,  0.0671, -0.2004, -0.0987,  0.2260])
Average return: 0.8268361816251732
DFE Rewards Mean: 0.8268361816251732
DFE Rewards standard dev 0.10209763983686047
Returns Mean: 2.000443
Returns standard dev 1.0129901
Advantages Mean: -0.91847587
Advantages standard dev 1.0129902
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 60%|█████▉    | 28/47 [00:03<00:02,  7.84it/s]

mean tensor([ 0.0520, -0.1956,  0.1486,  0.1181, -0.1779, -0.0969,  0.1939])
Average return: 0.8871129586134001
DFE Rewards Mean: 0.8871129586134001
DFE Rewards standard dev 0.06458792619047826
Returns Mean: 2.5098855
Returns standard dev 1.505456
Advantages Mean: -0.26103365
Advantages standard dev 1.505456
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 62%|██████▏   | 29/47 [00:03<00:02,  7.94it/s]

mean tensor([ 0.0559, -0.1889,  0.1443,  0.1129, -0.0935, -0.0965,  0.1689])
Average return: 0.7458373265874124
DFE Rewards Mean: 0.7458373265874124
DFE Rewards standard dev 0.152181711306709
Returns Mean: 1.6529602
Returns standard dev 0.9687611
Advantages Mean: -0.582484
Advantages standard dev 0.9687611
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 64%|██████▍   | 30/47 [00:03<00:02,  7.99it/s]

mean tensor([ 0.0192, -0.2002,  0.1878,  0.1032, -0.1793, -0.1009,  0.1760])
Average return: 0.8451350383759388
DFE Rewards Mean: 0.8451350383759388
DFE Rewards standard dev 0.07539482233922835
Returns Mean: 2.079937
Returns standard dev 1.1447145
Advantages Mean: -0.8264997
Advantages standard dev 1.1447145
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 66%|██████▌   | 31/47 [00:04<00:01,  8.05it/s]

mean tensor([ 0.0558, -0.1729,  0.1542,  0.0723, -0.1283, -0.0520,  0.1282])
Average return: 0.8458857872148163
DFE Rewards Mean: 0.8458857872148163
DFE Rewards standard dev 0.10112252822861227
Returns Mean: 2.404505
Returns standard dev 2.012301
Advantages Mean: 0.17589174
Advantages standard dev 2.012301
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 68%|██████▊   | 32/47 [00:04<00:01,  8.13it/s]

mean tensor([ 0.0400, -0.1806,  0.1329,  0.0086, -0.1471,  0.0068,  0.0971])
Average return: 0.9209380398739557
DFE Rewards Mean: 0.9209380398739557
DFE Rewards standard dev 0.058103973737010504
Returns Mean: 3.1428988
Returns standard dev 2.1082997
Advantages Mean: 0.8874323
Advantages standard dev 2.1082997
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 70%|███████   | 33/47 [00:04<00:01,  7.86it/s]

mean tensor([ 0.0078, -0.1733,  0.1623,  0.0277, -0.1455, -0.0103,  0.1031])
Average return: 0.8974945544490076
DFE Rewards Mean: 0.8974945544490076
DFE Rewards standard dev 0.07029957522938526
Returns Mean: 2.685343
Returns standard dev 1.5518093
Advantages Mean: -0.26956317
Advantages standard dev 1.5518092
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 72%|███████▏  | 34/47 [00:04<00:01,  7.77it/s]

mean tensor([ 0.0147, -0.2240,  0.1029,  0.0286, -0.0785,  0.0127,  0.0961])
Average return: 0.9467917375504078
DFE Rewards Mean: 0.9467917375504078
DFE Rewards standard dev 0.05116827693850136
Returns Mean: 4.298818
Returns standard dev 3.267462
Advantages Mean: 1.8522631
Advantages standard dev 3.2674618
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 74%|███████▍  | 35/47 [00:04<00:01,  7.92it/s]

mean tensor([ 0.0185, -0.2252,  0.1119,  0.0042, -0.0803, -0.0117,  0.1005])
Average return: 0.9460942401446483
DFE Rewards Mean: 0.9460942401446483
DFE Rewards standard dev 0.04763982914946939
Returns Mean: 3.9432607
Returns standard dev 2.8236563
Advantages Mean: 1.0966994
Advantages standard dev 2.8236563
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 77%|███████▋  | 36/47 [00:04<00:01,  8.05it/s]

mean tensor([ 0.0091, -0.2176,  0.1140,  0.0284, -0.0194, -0.0124,  0.0711])
Average return: 0.976591397377253
DFE Rewards Mean: 0.976591397377253
DFE Rewards standard dev 0.02902583489984699
Returns Mean: 5.8725653
Returns standard dev 3.9717026
Advantages Mean: 3.0942125
Advantages standard dev 3.9717026
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 79%|███████▊  | 37/47 [00:04<00:01,  8.19it/s]

mean tensor([ 0.0088, -0.2171,  0.1557,  0.0186,  0.0004, -0.0160,  0.0761])
Average return: 0.956288825754425
DFE Rewards Mean: 0.956288825754425
DFE Rewards standard dev 0.04608133036308571
Returns Mean: 4.2184677
Returns standard dev 2.647802
Advantages Mean: 1.2191824
Advantages standard dev 2.647802
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 81%|████████  | 38/47 [00:04<00:01,  8.24it/s]

mean tensor([ 0.0186, -0.2594,  0.1791,  0.0028, -0.0120,  0.0128,  0.0420])
Average return: 0.9497237999830008
DFE Rewards Mean: 0.9497237999830008
DFE Rewards standard dev 0.059491822658991306
Returns Mean: 4.769208
Returns standard dev 3.7550614
Advantages Mean: 1.1472852
Advantages standard dev 3.7550614
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 83%|████████▎ | 39/47 [00:05<00:00,  8.30it/s]

mean tensor([-0.0822, -0.1062,  0.3335,  0.0164, -0.1228,  0.0635, -0.0584])
Average return: 0.8695400152109818
DFE Rewards Mean: 0.8695400152109818
DFE Rewards standard dev 0.07329017056633631
Returns Mean: 2.2928216
Returns standard dev 1.1664553
Advantages Mean: -1.7114631
Advantages standard dev 1.1664553
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 85%|████████▌ | 40/47 [00:05<00:00,  8.33it/s]

mean tensor([-0.0471, -0.1446,  0.2780,  0.0053, -0.0962,  0.0310, -0.0504])
Average return: 0.8980184214546905
DFE Rewards Mean: 0.8980184214546905
DFE Rewards standard dev 0.07358733289280592
Returns Mean: 2.775899
Returns standard dev 1.8263063
Advantages Mean: -0.6949666
Advantages standard dev 1.8263063
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 87%|████████▋ | 41/47 [00:05<00:00,  8.32it/s]

mean tensor([-0.0433, -0.1949,  0.2515,  0.0136, -0.0773,  0.0511, -0.0414])
Average return: 0.9550955359438085
DFE Rewards Mean: 0.9550955359438085
DFE Rewards standard dev 0.0430503889977564
Returns Mean: 4.1027265
Returns standard dev 2.7591805
Advantages Mean: 0.654312
Advantages standard dev 2.7591805
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 89%|████████▉ | 42/47 [00:05<00:00,  8.34it/s]

mean tensor([-0.0158, -0.2044,  0.2018,  0.0085, -0.0107,  0.0421, -0.0413])
Average return: 0.9707013340244504
DFE Rewards Mean: 0.9707013340244504
DFE Rewards standard dev 0.033972619872484414
Returns Mean: 5.2030187
Returns standard dev 3.5682216
Advantages Mean: 2.1457396
Advantages standard dev 3.5682216
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 91%|█████████▏| 43/47 [00:05<00:00,  8.36it/s]

mean tensor([-0.0281, -0.2027,  0.2024,  0.0187, -0.0521,  0.0415, -0.0306])
Average return: 0.9641295633428777
DFE Rewards Mean: 0.9641295633428777
DFE Rewards standard dev 0.031961514125360475
Returns Mean: 4.4121685
Returns standard dev 2.9367497
Advantages Mean: 0.6792581
Advantages standard dev 2.9367495
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 94%|█████████▎| 44/47 [00:05<00:00,  8.38it/s]

mean tensor([-0.0265, -0.1989,  0.2190,  0.0182, -0.0365,  0.0430, -0.0352])
Average return: 0.9733697980496044
DFE Rewards Mean: 0.9733697980496044
DFE Rewards standard dev 0.024825885316801768
Returns Mean: 4.752741
Returns standard dev 3.0013058
Advantages Mean: 0.8045648
Advantages standard dev 3.0013058
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 96%|█████████▌| 45/47 [00:05<00:00,  8.38it/s]

mean tensor([-0.0953, -0.0736, -0.0022,  0.0221, -0.0690,  0.0358,  0.1363])
Average return: 0.946320170548208
DFE Rewards Mean: 0.946320170548208
DFE Rewards standard dev 0.05279301430461361
Returns Mean: 3.866943
Returns standard dev 2.5924442
Advantages Mean: 0.043649606
Advantages standard dev 2.592444
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 98%|█████████▊| 46/47 [00:05<00:00,  8.32it/s]

mean tensor([-0.0885, -0.1036,  0.0042,  0.0189, -0.0354,  0.0189,  0.1188])
Average return: 0.9428449169031113
DFE Rewards Mean: 0.9428449169031113
DFE Rewards standard dev 0.047123761226520136
Returns Mean: 3.8953676
Returns standard dev 2.9873912
Advantages Mean: 0.2782923
Advantages standard dev 2.9873912
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


100%|██████████| 47/47 [00:06<00:00,  7.83it/s]
[I 2023-12-22 16:17:17,554] Trial 0 finished with value: 0.9733697980496044 and parameters: {'N_UPDATES': 47, 'N_EPOCHS': 16, 'MINIBATCH_SIZE': 48, 'BATCHSIZE_MULTIPLIER': 6, 'LR': 0.003015472165788482, 'GAMMA': 0.9628452903435801, 'GAE_LAMBDA': 0.958779784198535, 'ENT_COEF': 0.0007091593841853005, 'V_COEF': 0.46685833460074655, 'GRADIENT_CLIP': 0.14425166294114908, 'CLIP_VALUE_COEF': 0.13219506338166914, 'CLIP_RATIO': 0.24473165681404896}. Best is trial 0 with value: 0.9733697980496044.


mean tensor([-0.0918, -0.1111, -0.0141,  0.0745, -0.0333,  0.0332,  0.1148])
Average return: 0.879252066437124
DFE Rewards Mean: 0.879252066437124
DFE Rewards standard dev 0.062330408470783714
Returns Mean: 2.3151457
Returns standard dev 0.9391003
Advantages Mean: -1.7907532
Advantages standard dev 0.9391004
Fidelity History: []
Best configuration saved to hpo_results/reward_0.97337.pickle


In [7]:
optimizer.hyperparams

['N_UPDATES',
 'N_EPOCHS',
 'MINIBATCH_SIZE',
 'BATCHSIZE_MULTIPLIER',
 'LR',
 'GAMMA',
 'GAE_LAMBDA',
 'ENT_COEF',
 'V_COEF',
 'GRADIENT_CLIP',
 'CLIP_VALUE_COEF',
 'CLIP_RATIO',
 'BATCHSIZE']

In [8]:
optimizer.num_hpo_trials

1

In [9]:
optimizer.best_hpo_configuration

{'best_avg_return': 0.9733697980496044,
 'best_parameters': {'N_UPDATES': 47,
  'N_EPOCHS': 16,
  'MINIBATCH_SIZE': 48,
  'BATCHSIZE_MULTIPLIER': 6,
  'LR': 0.003015472165788482,
  'GAMMA': 0.9628452903435801,
  'GAE_LAMBDA': 0.958779784198535,
  'ENT_COEF': 0.0007091593841853005,
  'V_COEF': 0.46685833460074655,
  'GRADIENT_CLIP': 0.14425166294114908,
  'CLIP_VALUE_COEF': 0.13219506338166914,
  'CLIP_RATIO': 0.24473165681404896}}