### CX Calibration with HPO under the new code architecture / workflow (DEC 2023)

In [1]:
import sys
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
import time
import yaml
import pickle
import optuna
module_path = os.path.abspath(os.path.join('/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control'))
if module_path not in sys.path:
    sys.path.append(module_path)

from quantumenvironment import QuantumEnvironment
from agent import Agent
from gate_level_abstraction import gate_q_env_config
from helper_functions import load_agent_from_yaml_file, create_agent_config
from ppo import make_train_ppo
from qconfig import QEnvConfig

import logging
logging.basicConfig(
    level=logging.WARNING,
    format="%(asctime)s INFO %(message)s", # hardcoded INFO level
    datefmt="%Y-%m-%d %H:%M:%S",
    stream=sys.stdout,
)



Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done for qubit 1 done.
All single qubit calibrations are done
Updated Instruction Schedule Map <InstructionScheduleMap(1Q instructions:
  q0: {'measure', 'delay', 'tdg', 'sdg', 'z', 'rz', 'x', 'h', 't', 'id', 'sx', 'reset', 's'}
  q1: {'measure', 'delay', 'tdg', 'sdg', 'z', 'rz', 'x', 'h', 't', 'id', 'sx', 'reset', 's'}
Multi qubit instructions:
  (0, 1): {'cr45m', 'ecr', 'cr45p'}
  (1, 0): {'cr45m', 'ecr', 'cr45p'}
)>
Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done fo

### Perform HPO

In [2]:
from hyperparameter_optimization import HyperparameterOptimizer

In [3]:
path_agent_config = 'agent_config.yaml'
save_results_path = 'hpo_results'

In [4]:
optimizer = HyperparameterOptimizer(gate_q_env_config, path_agent_config, save_results_path, log_progress=True, num_hpo_trials=1)
optimizer.optimize_hyperparameters()

 51%|█████     | 50/98 [00:03<00:03, 14.97it/s]

Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 53%|█████▎    | 52/98 [00:03<00:03, 15.05it/s]

mean tensor([ 0.0129,  0.0726, -0.0137, -0.0094,  0.0062, -0.0097,  0.0356])
Average return: 0.9851323146129741
DFE Rewards Mean: 0.9851323146129741
DFE Rewards standard dev 0.02144069547713437
Returns Mean: 6.5228567
Returns standard dev 4.1254487
Advantages Mean: 2.5167358
Advantages standard dev 4.1254487
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0193,  0.0854, -0.0993,  0.0441,  0.0353, -0.0756, -0.0152])
Average return: 0.984029575516343
DFE Rewards Mean: 0.984029575516343
DFE Rewards standard dev 0.020173092671580642
Returns Mean: 5.645577
Returns standard dev 3.4630191
Advantages Mean: 0.97009593
Advantages standard dev 3.463019
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([-0.1244,  0.0259, -0.0127,  0.0696,  0.1208, -0.0876,  0.0031])
Average return: 0.8966990139784136
DFE Rewards Mean: 

 55%|█████▌    | 54/98 [00:03<00:02, 15.26it/s]

mean tensor([-0.0862, -0.0127,  0.0153,  0.0336,  0.0879, -0.0753, -0.0043])
Average return: 0.9434545605210091
DFE Rewards Mean: 0.9434545605210091
DFE Rewards standard dev 0.03685547459862915
Returns Mean: 3.4334366
Returns standard dev 2.1183841
Advantages Mean: -0.2785891
Advantages standard dev 2.1183841
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 57%|█████▋    | 56/98 [00:03<00:02, 15.35it/s]

mean tensor([-0.1060,  0.0115, -0.0455,  0.0479,  0.0655, -0.1168,  0.0314])
Average return: 0.956668687714669
DFE Rewards Mean: 0.956668687714669
DFE Rewards standard dev 0.03646304519761939
Returns Mean: 4.021659
Returns standard dev 2.6717005
Advantages Mean: -0.008654356
Advantages standard dev 2.6717005
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([-0.0971,  0.0164,  0.0024,  0.0202,  0.0605, -0.1184,  0.0120])
Average return: 0.9656030407621943
DFE Rewards Mean: 0.9656030407621943
DFE Rewards standard dev 0.03550370435535728
Returns Mean: 4.9581513
Returns standard dev 3.5338633
Advantages Mean: 0.82132435
Advantages standard dev 3.5338635
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.1137, -0.0840,  0.0432, -0.0402,  0.0616, -0.1674, -0.0557])
Average return: 0.8848901503166539
DFE Rewards Mea

 59%|█████▉    | 58/98 [00:03<00:02, 15.51it/s]

mean tensor([-0.1456, -0.0420,  0.0305, -0.0301,  0.0182, -0.2004, -0.0450])
Average return: 0.9697157881266049
DFE Rewards Mean: 0.9697157881266049
DFE Rewards standard dev 0.034703243057864756
Returns Mean: 5.2235894
Returns standard dev 3.6656075
Advantages Mean: 1.2551708
Advantages standard dev 3.6656077
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 61%|██████    | 60/98 [00:03<00:02, 15.53it/s]

mean tensor([-0.1378, -0.0356,  0.0067, -0.0216,  0.0506, -0.1675, -0.0494])
Average return: 0.8888877871633982
DFE Rewards Mean: 0.8888877871633982
DFE Rewards standard dev 0.04536287543451965
Returns Mean: 2.296083
Returns standard dev 0.48251376
Advantages Mean: -1.6817163
Advantages standard dev 0.48251376
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.1746, -0.0229, -0.0180, -0.0125,  0.0199, -0.1855, -0.0729])
Average return: 0.8900484258203178
DFE Rewards Mean: 0.8900484258203178
DFE Rewards standard dev 0.04601041164780469
Returns Mean: 2.401705
Returns standard dev 1.1971985
Advantages Mean: -1.644974
Advantages standard dev 1.1971985
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([-0.1432, -0.0258,  0.0420, -0.0423,  0.0337, -0.1379, -0.0594])
Average return: 0.8942562786331649
DFE Rewards Mea

 63%|██████▎   | 62/98 [00:04<00:02, 15.62it/s]

mean tensor([-0.0716, -0.0413,  0.0898, -0.0647,  0.0551, -0.0868, -0.0563])
Average return: 0.9047260391105666
DFE Rewards Mean: 0.9047260391105666
DFE Rewards standard dev 0.07329288983145325
Returns Mean: 2.805959
Returns standard dev 1.6726595
Advantages Mean: 0.2845841
Advantages standard dev 1.6726594
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 65%|██████▌   | 64/98 [00:04<00:02, 15.41it/s]

mean tensor([-0.1115, -0.0318,  0.0040,  0.0013,  0.0017, -0.1345, -0.0257])
Average return: 0.9785551709614216
DFE Rewards Mean: 0.9785551709614216
DFE Rewards standard dev 0.02905050557265943
Returns Mean: 6.154315
Returns standard dev 4.147455
Advantages Mean: 2.6424096
Advantages standard dev 4.147455
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])
mean tensor([-0.0749,  0.0142,  0.0606, -0.0344, -0.0436, -0.0872, -0.0122])
Average return: 0.9621945857179615
DFE Rewards Mean: 0.9621945857179615
DFE Rewards standard dev 0.030812488839412164
Returns Mean: 4.375989
Returns standard dev 2.9663296
Advantages Mean: 1.0510684
Advantages standard dev 2.9663296
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 67%|██████▋   | 66/98 [00:04<00:02, 15.41it/s]

mean tensor([-0.0383,  0.0078,  0.0175, -0.0208, -0.0592, -0.1029, -0.0021])
Average return: 0.9538022551550369
DFE Rewards Mean: 0.9538022551550369
DFE Rewards standard dev 0.036532078573712
Returns Mean: 4.1463685
Returns standard dev 2.9972446
Advantages Mean: 0.3176275
Advantages standard dev 2.9972441
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])
mean tensor([-0.0438,  0.0264,  0.0144, -0.0352, -0.0600, -0.0554, -0.0159])
Average return: 0.9735113111840008
DFE Rewards Mean: 0.9735113111840008
DFE Rewards standard dev 0.025536239892043622
Returns Mean: 5.4035144
Returns standard dev 3.876914
Advantages Mean: 1.4550334
Advantages standard dev 3.8769147
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0121,  0.0105,  0.0318, -0.0386, -0.0370, -0.0271, -0.0126])
Average return: 0.9786544206533551
DFE Rewards Mean: 

 69%|██████▉   | 68/98 [00:04<00:02, 14.90it/s]

mean tensor([-0.0375,  0.0268, -0.0412, -0.0303, -0.0745, -0.0425, -0.0343])
Average return: 0.9659290713921062
DFE Rewards Mean: 0.9659290713921062
DFE Rewards standard dev 0.029325493609360122
Returns Mean: 4.245868
Returns standard dev 2.5116966
Advantages Mean: -0.34945515
Advantages standard dev 2.5116966
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0921,  0.0490, -0.1059, -0.0481, -0.0829, -0.0143, -0.0640])
Average return: 0.923112756232478
DFE Rewards Mean: 0.923112756232478
DFE Rewards standard dev 0.04967414771210666
Returns Mean: 3.0064485
Returns standard dev 1.6988255
Advantages Mean: -1.8981133
Advantages standard dev 1.6988255
Fidelity History: []


 71%|███████▏  | 70/98 [00:04<00:01, 15.29it/s]

SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0989,  0.0981, -0.0777, -0.0460, -0.0593, -0.0131, -0.0500])
Average return: 0.9507805656440527
DFE Rewards Mean: 0.9507805656440527
DFE Rewards standard dev 0.03215021527502243
Returns Mean: 3.5212655
Returns standard dev 1.9053209
Advantages Mean: -0.9296592
Advantages standard dev 1.9053209
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])
mean tensor([-0.0398,  0.0629,  0.0171, -0.0544, -0.0351, -0.0291, -0.0438])
Average return: 0.9762109727930708
DFE Rewards Mean: 0.9762109727930708
DFE Rewards standard dev 0.02680669787598766
Returns Mean: 5.714058
Returns standard dev 3.9037075
Advantages Mean: 2.1586902
Advantages standard dev 3.9037075
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 73%|███████▎  | 72/98 [00:04<00:01, 15.60it/s]

mean tensor([-0.0217,  0.0900,  0.0399, -0.0583,  0.0229, -0.0743, -0.0506])
Average return: 0.9815116781627504
DFE Rewards Mean: 0.9815116781627504
DFE Rewards standard dev 0.022696822983378336
Returns Mean: 5.9925623
Returns standard dev 4.0457377
Advantages Mean: 2.0481682
Advantages standard dev 4.0457377
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 76%|███████▌  | 74/98 [00:04<00:01, 15.67it/s]

mean tensor([-0.0401,  0.0792,  0.0494, -0.0866,  0.0334, -0.0316, -0.0572])
Average return: 0.9785492457669511
DFE Rewards Mean: 0.9785492457669511
DFE Rewards standard dev 0.02262328383781761
Returns Mean: 5.5907984
Returns standard dev 3.6577833
Advantages Mean: 1.3314525
Advantages standard dev 3.6577833
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0456,  0.0611,  0.0438, -0.0688,  0.0132, -0.0195, -0.0448])
Average return: 0.9406712096903738
DFE Rewards Mean: 0.9406712096903738
DFE Rewards standard dev 0.04374944147665132
Returns Mean: 3.600677
Returns standard dev 2.5658946
Advantages Mean: -1.2924627
Advantages standard dev 2.5658944
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0125,  0.0013,  0.0785, -0.0957,  0.0471, -0.0276, -0.0403])
Average return: 0.9782825891454312
DFE Rewards Mean: 0.

 78%|███████▊  | 76/98 [00:04<00:01, 15.84it/s]

mean tensor([-0.0520,  0.0566,  0.0091, -0.0936, -0.0099, -0.0399, -0.0425])
Average return: 0.9519642172831041
DFE Rewards Mean: 0.9519642172831041
DFE Rewards standard dev 0.029813683309738097
Returns Mean: 3.6336765
Returns standard dev 2.2937067
Advantages Mean: -1.287913
Advantages standard dev 2.2937071
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 80%|███████▉  | 78/98 [00:05<00:01, 15.97it/s]

mean tensor([ 0.0144,  0.0064,  0.0325, -0.0849,  0.0403, -0.0267, -0.0414])
Average return: 0.9256069444769321
DFE Rewards Mean: 0.9256069444769321
DFE Rewards standard dev 0.06997695368686742
Returns Mean: 3.5578723
Returns standard dev 2.6402707
Advantages Mean: -0.052799385
Advantages standard dev 2.6402705
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0236,  0.0617, -0.0554, -0.0364, -0.0362, -0.0031, -0.0108])
Average return: 0.9820888787595755
DFE Rewards Mean: 0.9820888787595755
DFE Rewards standard dev 0.020419132408183042
Returns Mean: 5.9379153
Returns standard dev 3.8268526
Advantages Mean: 1.3896713
Advantages standard dev 3.8268526
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([-0.0272,  0.1090, -0.1131, -0.0532,  0.0203,  0.0085, -0.0164])
Average return: 0.9843948644948176
DFE Rewards 

 82%|████████▏ | 80/98 [00:05<00:01, 15.69it/s]

mean tensor([-0.0719,  0.1642, -0.0023, -0.0309,  0.0867,  0.0269, -0.0405])
Average return: 0.9547143094269865
DFE Rewards Mean: 0.9547143094269865
DFE Rewards standard dev 0.034064369878846375
Returns Mean: 3.9722438
Returns standard dev 2.7204258
Advantages Mean: -0.4216285
Advantages standard dev 2.7204258
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 84%|████████▎ | 82/98 [00:05<00:01, 15.68it/s]

mean tensor([-0.0561,  0.1747, -0.0215, -0.0389,  0.0708,  0.0105, -0.0272])
Average return: 0.9581334810589066
DFE Rewards Mean: 0.9581334810589066
DFE Rewards standard dev 0.038491284073060884
Returns Mean: 4.3089466
Returns standard dev 3.0855498
Advantages Mean: 0.2705652
Advantages standard dev 3.08555
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([-1.1897e-01,  3.0632e-01, -9.1294e-02, -2.1814e-03,  1.0239e-04,
         2.9044e-02, -2.5243e-02])
Average return: 0.8593783648243968
DFE Rewards Mean: 0.8593783648243968
DFE Rewards standard dev 0.05725264964849005
Returns Mean: 2.147651
Returns standard dev 1.194365
Advantages Mean: -2.9697857
Advantages standard dev 1.194365
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0637,  0.1578, -0.0233, -0.0025,  0.0459, -0.0100, -0.0123])
Average return: 0.

 86%|████████▌ | 84/98 [00:05<00:00, 15.82it/s]

mean tensor([-0.0380,  0.1264,  0.0043, -0.0070,  0.0386,  0.0116, -0.0080])
Average return: 0.968685222633776
DFE Rewards Mean: 0.968685222633776
DFE Rewards standard dev 0.0315898714620341
Returns Mean: 4.6715646
Returns standard dev 3.0501187
Advantages Mean: 1.0394348
Advantages standard dev 3.0501184
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 88%|████████▊ | 86/98 [00:05<00:00, 15.82it/s]

mean tensor([-0.0376,  0.2465, -0.0389,  0.0367,  0.0027,  0.0015, -0.0110])
Average return: 0.9832677431398394
DFE Rewards Mean: 0.9832677431398394
DFE Rewards standard dev 0.021957043369348655
Returns Mean: 6.5589957
Returns standard dev 4.3063555
Advantages Mean: 2.1700513
Advantages standard dev 4.3063555
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])
mean tensor([-0.0243,  0.2401, -0.0459,  0.0206,  0.0181, -0.0045, -0.0061])
Average return: 0.987387518181281
DFE Rewards Mean: 0.987387518181281
DFE Rewards standard dev 0.018692377635277473
Returns Mean: 6.6103625
Returns standard dev 4.151986
Advantages Mean: 2.1141255
Advantages standard dev 4.151986
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0239,  0.2873, -0.0734,  0.0476, -0.0438,  0.0092,  0.0365])
Average return: 0.8425913613376748
DFE Rewards Mean: 

 90%|████████▉ | 88/98 [00:05<00:00, 15.85it/s]

mean tensor([-0.0221,  0.2638, -0.1218,  0.0551, -0.0388,  0.0080,  0.0258])
Average return: 0.9226981826911079
DFE Rewards Mean: 0.9226981826911079
DFE Rewards standard dev 0.06624827544185123
Returns Mean: 3.4520514
Returns standard dev 2.5476859
Advantages Mean: -1.3551275
Advantages standard dev 2.5476859
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 92%|█████████▏| 90/98 [00:05<00:00, 15.96it/s]

mean tensor([ 0.0151,  0.1412, -0.0821, -0.0108,  0.0234, -0.0089,  0.0028])
Average return: 0.9721847946997723
DFE Rewards Mean: 0.9721847946997723
DFE Rewards standard dev 0.030764590432766203
Returns Mean: 4.9937773
Returns standard dev 3.1857502
Advantages Mean: 1.1661489
Advantages standard dev 3.1857502
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0616,  0.2756, -0.1906,  0.0391, -0.0663,  0.0076, -0.0128])
Average return: 0.9652930424744061
DFE Rewards Mean: 0.9652930424744061
DFE Rewards standard dev 0.02787100788074129
Returns Mean: 4.2031407
Returns standard dev 2.614051
Advantages Mean: -0.8860136
Advantages standard dev 2.6140513
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])
mean tensor([-0.0232,  0.1783, -0.1511,  0.0099, -0.0061, -0.0129,  0.0058])
Average return: 0.9853798112582373
DFE Rewards Mea

 94%|█████████▍| 92/98 [00:05<00:00, 15.97it/s]

mean tensor([-0.0276,  0.2196, -0.1514,  0.0139, -0.0085,  0.0036, -0.0059])
Average return: 0.9948039415167372
DFE Rewards Mean: 0.9948039415167372
DFE Rewards standard dev 0.011920789635665749
Returns Mean: 7.633149
Returns standard dev 4.1166644
Advantages Mean: 3.0319579
Advantages standard dev 4.1166644
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 96%|█████████▌| 94/98 [00:06<00:00, 16.07it/s]

mean tensor([-0.0262,  0.2659, -0.1910,  0.0336, -0.0491, -0.0073, -0.0109])
Average return: 0.9637583936711692
DFE Rewards Mean: 0.9637583936711692
DFE Rewards standard dev 0.03909225686681533
Returns Mean: 4.8177924
Returns standard dev 3.4209752
Advantages Mean: -0.3885474
Advantages standard dev 3.4209747
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0162,  0.1496, -0.1117, -0.0051, -0.0103, -0.0476, -0.0253])
Average return: 0.9772725502685853
DFE Rewards Mean: 0.9772725502685853
DFE Rewards standard dev 0.029231535264474428
Returns Mean: 5.5821176
Returns standard dev 3.6857126
Advantages Mean: 1.4622812
Advantages standard dev 3.6857123
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0425,  0.2606, -0.2167,  0.0401, -0.0926, -0.0451, -0.0448])
Average return: 0.9580240237703783
DFE Rewards Mean: 

 98%|█████████▊| 96/98 [00:06<00:00, 16.01it/s]

mean tensor([-0.0289,  0.2644, -0.1940,  0.0353, -0.0489, -0.0439, -0.0337])
Average return: 0.97668000303036
DFE Rewards Mean: 0.97668000303036
DFE Rewards standard dev 0.024246254177542555
Returns Mean: 5.18454
Returns standard dev 3.378544
Advantages Mean: 0.35612026
Advantages standard dev 3.3785443
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


100%|██████████| 98/98 [00:06<00:00, 15.49it/s]
[I 2023-12-22 16:25:50,813] Trial 0 finished with value: 0.9652930424744061 and parameters: {'N_UPDATES': 98, 'N_EPOCHS': 8, 'MINIBATCH_SIZE': 16, 'BATCHSIZE_MULTIPLIER': 7, 'LR': 0.0004971391510729455, 'GAMMA': 0.960308356969359, 'GAE_LAMBDA': 0.9655529081499541, 'ENT_COEF': 5.899776130038774e-05, 'V_COEF': 0.46380666504150386, 'GRADIENT_CLIP': 0.5719427526186973, 'CLIP_VALUE_COEF': 0.20564201148973127, 'CLIP_RATIO': 0.2907789044909578}. Best is trial 0 with value: 0.9652930424744061.


mean tensor([-0.0518,  0.3676, -0.2595,  0.0028, -0.1014, -0.0200, -0.0230])
Average return: 0.9561751129573902
DFE Rewards Mean: 0.9561751129573902
DFE Rewards standard dev 0.03708354388885389
Returns Mean: 4.114571
Returns standard dev 2.7217057
Advantages Mean: -1.3467792
Advantages standard dev 2.7217057
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0315,  0.3081, -0.2433, -0.0215, -0.0877, -0.0384, -0.0129])
Average return: 0.9607451466713611
DFE Rewards Mean: 0.9607451466713611
DFE Rewards standard dev 0.04075745517709218
Returns Mean: 4.618431
Returns standard dev 3.3313808
Advantages Mean: -0.3486392
Advantages standard dev 3.331381
Fidelity History: []
2023-12-22 16:25:50 INFO ---------------- FINISHED HPO ----------------
2023-12-22 16:25:50 INFO HPO completed in 6.4 seconds.
2023-12-22 16:25:50 INFO Best trial:
2023-12-22 16:25:50 INFO -------------------------
2023-12-22 16:25:50 I

In [5]:
optimizer.hyperparams

['N_UPDATES',
 'N_EPOCHS',
 'MINIBATCH_SIZE',
 'BATCHSIZE_MULTIPLIER',
 'LR',
 'GAMMA',
 'GAE_LAMBDA',
 'ENT_COEF',
 'V_COEF',
 'GRADIENT_CLIP',
 'CLIP_VALUE_COEF',
 'CLIP_RATIO',
 'BATCHSIZE']

In [6]:
optimizer.num_hpo_trials

1

In [7]:
optimizer.best_hpo_configuration

{'best_avg_return': 0.9652930424744061,
 'best_hyperparams': {'N_UPDATES': 98,
  'N_EPOCHS': 8,
  'MINIBATCH_SIZE': 16,
  'BATCHSIZE_MULTIPLIER': 7,
  'LR': 0.0004971391510729455,
  'GAMMA': 0.960308356969359,
  'GAE_LAMBDA': 0.9655529081499541,
  'ENT_COEF': 5.899776130038774e-05,
  'V_COEF': 0.46380666504150386,
  'GRADIENT_CLIP': 0.5719427526186973,
  'CLIP_VALUE_COEF': 0.20564201148973127,
  'CLIP_RATIO': 0.2907789044909578}}