## CX Calibration with HPO

#### Imports

In [1]:
import sys
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
module_path = os.path.abspath(os.path.join('/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control'))
if module_path not in sys.path:
    sys.path.append(module_path)

from template_configurations import gate_q_env_config
import logging
logging.basicConfig(
    level=logging.WARNING,
    format="%(asctime)s INFO %(message)s", # hardcoded INFO level
    datefmt="%Y-%m-%d %H:%M:%S",
    stream=sys.stdout,
)



Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done for qubit 1 done.
All single qubit calibrations are done
Updated Instruction Schedule Map <InstructionScheduleMap(1Q instructions:
  q0: {'h', 'reset', 'rz', 'x', 'id', 'measure', 'delay', 'z', 't', 'sx', 's', 'sdg', 'tdg'}
  q1: {'h', 'reset', 'rz', 'x', 'id', 'measure', 'delay', 'z', 't', 'sx', 's', 'sdg', 'tdg'}
Multi qubit instructions:
  (0, 1): {'cr45p', 'ecr', 'cr45m'}
  (1, 0): {'cr45p', 'ecr', 'cr45m'}
)>


Which gate is to be calibrated?

In [2]:
gate_q_env_config.target

{'register': [0, 1],
 'gate': Instruction(name='cx', num_qubits=2, num_clbits=0, params=[])}

### Perform HPO

In [3]:
from hyperparameter_optimization import HyperparameterOptimizer

Set path to the files specifying the RL agent and where to store the HPO results

In [4]:
current_dir = os.getcwd()

path_agent_config = os.path.join(os.path.dirname(current_dir), 'agent_config.yaml')
path_hpo_config = os.path.join(current_dir, 'config_yamls', 'hpo_config.yaml')
save_results_path = 'hpo_results'

In [5]:
from quantumenvironment import QuantumEnvironment

In [6]:
q_env = QuantumEnvironment(gate_q_env_config)

SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


In [7]:
optimizer = HyperparameterOptimizer(q_env=q_env,
                                    path_agent_config=path_agent_config,
                                    path_hpo_config=path_hpo_config, 
                                    save_results_path=save_results_path, 
                                    log_progress=True,
                                    num_hpo_trials=2)
optimizer.optimize_hyperparameters()

 63%|██████▎   | 50/79 [00:19<00:10,  2.72it/s]

Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 65%|██████▍   | 51/79 [00:20<00:10,  2.72it/s]

mean tensor([-0.0948, -0.2298, -0.0084, -0.1805, -0.3260, -0.1416,  0.2550])
Average return: 0.08623351852344785
DFE Rewards Mean: 0.08623351852344785
DFE Rewards standard dev 0.06441803121150665
Returns Mean: 0.093155846
Returns standard dev 0.0733189
Advantages Mean: -0.1689402
Advantages standard dev 0.0733189
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 66%|██████▌   | 52/79 [00:20<00:10,  2.67it/s]

mean tensor([-0.1023, -0.2310,  0.0065, -0.1664, -0.4040, -0.2034,  0.3123])
Average return: 0.05800285599974268
DFE Rewards Mean: 0.05800285599974268
DFE Rewards standard dev 0.05554726183459821
Returns Mean: 0.06217348
Returns standard dev 0.06084631
Advantages Mean: -0.04934904
Advantages standard dev 0.06084631
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 67%|██████▋   | 53/79 [00:21<00:10,  2.56it/s]

mean tensor([-0.0932, -0.2353, -0.0070, -0.2011, -0.4341, -0.2045,  0.4961])
Average return: 0.39768813900106026
DFE Rewards Mean: 0.39768813900106026
DFE Rewards standard dev 0.08450706213097618
Returns Mean: 0.51696765
Returns standard dev 0.14221147
Advantages Mean: 0.43783465
Advantages standard dev 0.14221147
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 68%|██████▊   | 54/79 [00:21<00:10,  2.50it/s]

mean tensor([-0.1019, -0.2623, -0.0251, -0.1919, -0.4718, -0.2329,  0.5001])
Average return: 0.8375720157089879
DFE Rewards Mean: 0.8375720157089879
DFE Rewards standard dev 0.09061943637923894
Returns Mean: 2.078568
Returns standard dev 1.1664224
Advantages Mean: 1.7465966
Advantages standard dev 1.1664224
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 70%|██████▉   | 55/79 [00:21<00:09,  2.55it/s]

mean tensor([-0.1087, -0.2959, -0.0427, -0.1850, -0.3649, -0.1912,  0.4816])
Average return: 0.9373029819112022
DFE Rewards Mean: 0.9373029819112022
DFE Rewards standard dev 0.04988677481720689
Returns Mean: 3.719474
Returns standard dev 2.822083
Advantages Mean: 2.8201447
Advantages standard dev 2.822083
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 71%|███████   | 56/79 [00:22<00:10,  2.14it/s]

mean tensor([-0.0828, -0.3200, -0.0443, -0.2015, -0.3851, -0.2001,  0.3792])
Average return: 0.383624533498902
DFE Rewards Mean: 0.383624533498902
DFE Rewards standard dev 0.08134916160324847
Returns Mean: 0.49282607
Returns standard dev 0.134744
Advantages Mean: -1.0652945
Advantages standard dev 0.13474402
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 72%|███████▏  | 57/79 [00:22<00:09,  2.27it/s]

mean tensor([-0.0952, -0.3272, -0.0540, -0.1939, -0.4235, -0.2319,  0.3719])
Average return: 0.2786446024232412
DFE Rewards Mean: 0.2786446024232412
DFE Rewards standard dev 0.08892953424046346
Returns Mean: 0.33430904
Returns standard dev 0.12427524
Advantages Mean: -0.8127969
Advantages standard dev 0.12427524
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 73%|███████▎  | 58/79 [00:23<00:09,  2.30it/s]

mean tensor([-0.1128, -0.3571, -0.0767, -0.1812, -0.4369, -0.2184,  0.3902])
Average return: 0.36275845458526507
DFE Rewards Mean: 0.36275845458526507
DFE Rewards standard dev 0.08762648820088287
Returns Mean: 0.46057278
Returns standard dev 0.14363594
Advantages Mean: -0.25637645
Advantages standard dev 0.14363594
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 75%|███████▍  | 59/79 [00:23<00:08,  2.45it/s]

mean tensor([-0.1202, -0.3845, -0.0896, -0.1966, -0.4516, -0.2147,  0.4194])
Average return: 0.8502170273786185
DFE Rewards Mean: 0.8502170273786185
DFE Rewards standard dev 0.08513209203386292
Returns Mean: 2.1959653
Returns standard dev 1.3464804
Advantages Mean: 1.7147003
Advantages standard dev 1.3464804
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 76%|███████▌  | 60/79 [00:24<00:07,  2.58it/s]

mean tensor([-0.1225, -0.3743, -0.0843, -0.2022, -0.3569, -0.1366,  0.4269])
Average return: 0.9481998649034642
DFE Rewards Mean: 0.9481998649034642
DFE Rewards standard dev 0.03822152215483758
Returns Mean: 3.8225887
Returns standard dev 2.7135618
Advantages Mean: 2.835403
Advantages standard dev 2.7135618
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 77%|███████▋  | 61/79 [00:24<00:06,  2.69it/s]

mean tensor([-0.0992, -0.3711, -0.0736, -0.2004, -0.3713, -0.1334,  0.3534])
Average return: 0.8991278797744691
DFE Rewards Mean: 0.8991278797744691
DFE Rewards standard dev 0.06917235854805365
Returns Mean: 2.846194
Returns standard dev 2.0191085
Advantages Mean: 1.135307
Advantages standard dev 2.0191085
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 78%|███████▊  | 62/79 [00:24<00:06,  2.75it/s]

mean tensor([-0.0936, -0.3609, -0.0672, -0.1889, -0.2771, -0.0329,  0.3431])
Average return: 0.3323121401206835
DFE Rewards Mean: 0.3323121401206835
DFE Rewards standard dev 0.08182868836232005
Returns Mean: 0.41157228
Returns standard dev 0.124331795
Advantages Mean: -1.5045376
Advantages standard dev 0.124331795
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 80%|███████▉  | 63/79 [00:25<00:05,  2.82it/s]

mean tensor([-0.1034, -0.3752, -0.0890, -0.2057, -0.2732, -0.0221,  0.3591])
Average return: 0.06780351082996652
DFE Rewards Mean: 0.06780351082996652
DFE Rewards standard dev 0.05100600211111313
Returns Mean: 0.07239615
Returns standard dev 0.0553662
Advantages Mean: -1.4746977
Advantages standard dev 0.0553662
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 81%|████████  | 64/79 [00:25<00:05,  2.87it/s]

mean tensor([-0.1001, -0.3758, -0.0832, -0.1973, -0.3077, -0.0580,  0.4001])
Average return: 0.9513583294321574
DFE Rewards Mean: 0.9513583294321574
DFE Rewards standard dev 0.03911905538117926
Returns Mean: 3.8985333
Returns standard dev 2.6622965
Advantages Mean: 2.7577007
Advantages standard dev 2.6622965
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 82%|████████▏ | 65/79 [00:25<00:04,  2.91it/s]

mean tensor([-0.1023, -0.3862, -0.0696, -0.2079, -0.3198, -0.0620,  0.3351])
Average return: 0.9633532783766174
DFE Rewards Mean: 0.9633532783766174
DFE Rewards standard dev 0.0332503944582041
Returns Mean: 4.487298
Returns standard dev 3.1376514
Advantages Mean: 2.9516978
Advantages standard dev 3.1376514
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 84%|████████▎ | 66/79 [00:26<00:04,  2.62it/s]

mean tensor([-0.0984, -0.3893, -0.0513, -0.2066, -0.3290, -0.0517,  0.2814])
Average return: 0.9693386881139988
DFE Rewards Mean: 0.9693386881139988
DFE Rewards standard dev 0.027018721018084776
Returns Mean: 4.5735273
Returns standard dev 2.9614894
Advantages Mean: 2.6136625
Advantages standard dev 2.9614894
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 85%|████████▍ | 67/79 [00:26<00:04,  2.66it/s]

mean tensor([-0.0942, -0.3883, -0.0595, -0.1893, -0.3425, -0.0463,  0.2510])
Average return: 0.32811343383704816
DFE Rewards Mean: 0.32811343383704816
DFE Rewards standard dev 0.0815351018098454
Returns Mean: 0.40519157
Returns standard dev 0.12359081
Advantages Mean: -1.9468321
Advantages standard dev 0.123590805
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 86%|████████▌ | 68/79 [00:26<00:04,  2.72it/s]

mean tensor([-0.0938, -0.3986, -0.0704, -0.1981, -0.3485, -0.0413,  0.2574])
Average return: 0.9161925709025474
DFE Rewards Mean: 0.9161925709025474
DFE Rewards standard dev 0.06512675330417438
Returns Mean: 3.2025483
Returns standard dev 2.3372078
Advantages Mean: 1.2742395
Advantages standard dev 2.3372078
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 87%|████████▋ | 69/79 [00:27<00:03,  2.71it/s]

mean tensor([-0.0968, -0.3818, -0.0593, -0.1870, -0.2576,  0.0022,  0.2744])
Average return: 0.04024898729995725
DFE Rewards Mean: 0.04024898729995725
DFE Rewards standard dev 0.035792405382589466
Returns Mean: 0.042865146
Returns standard dev 0.036478106
Advantages Mean: -2.145287
Advantages standard dev 0.036478102
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 89%|████████▊ | 70/79 [00:27<00:03,  2.72it/s]

mean tensor([-0.0933, -0.3790, -0.0632, -0.2099, -0.2722,  0.0049,  0.3229])
Average return: 0.9650047768819799
DFE Rewards Mean: 0.9650047768819799
DFE Rewards standard dev 0.030598658332066504
Returns Mean: 4.5760055
Returns standard dev 3.2238252
Advantages Mean: 2.8140578
Advantages standard dev 3.2238252
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 90%|████████▉ | 71/79 [00:27<00:02,  2.68it/s]

mean tensor([-0.0898, -0.3966, -0.0852, -0.1836, -0.2865,  0.0143,  0.2763])
Average return: 0.05363451491880631
DFE Rewards Mean: 0.05363451491880631
DFE Rewards standard dev 0.04933265930247869
Returns Mean: 0.0575333
Returns standard dev 0.05318302
Advantages Mean: -2.1353564
Advantages standard dev 0.05318302
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 91%|█████████ | 72/79 [00:28<00:02,  2.35it/s]

mean tensor([-0.0920, -0.4105, -0.0777, -0.1872, -0.3422, -0.0341,  0.3128])
Average return: 0.9686519548987041
DFE Rewards Mean: 0.9686519548987041
DFE Rewards standard dev 0.02881826190492043
Returns Mean: 4.6925855
Returns standard dev 3.1818798
Advantages Mean: 2.887838
Advantages standard dev 3.1818798
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 92%|█████████▏| 73/79 [00:28<00:02,  2.39it/s]

mean tensor([-0.0806, -0.4188, -0.0785, -0.1833, -0.3500, -0.0473,  0.2702])
Average return: 0.37664885672370224
DFE Rewards Mean: 0.37664885672370224
DFE Rewards standard dev 0.09132868379310649
Returns Mean: 0.48360616
Returns standard dev 0.14928524
Advantages Mean: -1.6886717
Advantages standard dev 0.14928523
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 94%|█████████▎| 74/79 [00:29<00:02,  2.46it/s]

mean tensor([-0.0908, -0.4171, -0.0903, -0.1810, -0.3972, -0.0887,  0.2828])
Average return: 0.38514264770160195
DFE Rewards Mean: 0.38514264770160195
DFE Rewards standard dev 0.08315909293574324
Returns Mean: 0.49564862
Returns standard dev 0.13706267
Advantages Mean: -1.2036583
Advantages standard dev 0.13706267
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 95%|█████████▍| 75/79 [00:29<00:01,  2.51it/s]

mean tensor([-0.0904, -0.4033, -0.0808, -0.1848, -0.4504, -0.1336,  0.2790])
Average return: 0.26318830258799336
DFE Rewards Mean: 0.26318830258799336
DFE Rewards standard dev 0.08143673819191638
Returns Mean: 0.31171352
Returns standard dev 0.11325153
Advantages Mean: -0.7978136
Advantages standard dev 0.11325153
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 96%|█████████▌| 76/79 [00:30<00:01,  2.54it/s]

mean tensor([-0.0856, -0.4090, -0.0937, -0.1718, -0.4654, -0.1129,  0.2895])
Average return: 0.8926596021963629
DFE Rewards Mean: 0.8926596021963629
DFE Rewards standard dev 0.08537340558702876
Returns Mean: 2.8512533
Returns standard dev 2.011879
Advantages Mean: 2.1793783
Advantages standard dev 2.011879
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 97%|█████████▋| 77/79 [00:30<00:00,  2.04it/s]

mean tensor([-0.0850, -0.4098, -0.1024, -0.1654, -0.3769, -0.0059,  0.2836])
Average return: 0.9262095083381088
DFE Rewards Mean: 0.9262095083381088
DFE Rewards standard dev 0.06350473503962621
Returns Mean: 3.5300941
Returns standard dev 2.6830754
Advantages Mean: 2.5256863
Advantages standard dev 2.6830752
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 99%|█████████▊| 78/79 [00:31<00:00,  2.17it/s]

mean tensor([-0.0802, -0.4180, -0.0972, -0.1407, -0.2925,  0.0854,  0.2452])
Average return: 0.27666450999104225
DFE Rewards Mean: 0.27666450999104225
DFE Rewards standard dev 0.08494051378056168
Returns Mean: 0.33096328
Returns standard dev 0.12002354
Advantages Mean: -1.0688443
Advantages standard dev 0.12002354
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


100%|██████████| 79/79 [00:31<00:00,  2.50it/s]
[I 2024-01-25 15:25:12,002] Trial 1 finished with value: 0.37664885672370224 and parameters: {'N_UPDATES': 79, 'N_EPOCHS': 25, 'MINIBATCH_SIZE': 128, 'BATCHSIZE_MULTIPLIER': 10, 'LR': 0.014944055379446496, 'GAMMA': 0.9545702557404366, 'GAE_LAMBDA': 0.9097758555679725, 'ENT_COEF': 0.0003943546155431256, 'V_COEF': 0.4888562367663966, 'GRADIENT_CLIP': 0.5562320347644294, 'CLIP_VALUE_COEF': 0.21746508094432535, 'CLIP_RATIO': 0.2588943918914861}. Best is trial 0 with value: 0.8380874939958372.


mean tensor([-0.0749, -0.4392, -0.1310, -0.1119, -0.3032,  0.0781,  0.2573])
Average return: 0.9305399790039847
DFE Rewards Mean: 0.9305399790039847
DFE Rewards standard dev 0.05927282071532869
Returns Mean: 3.6229293
Returns standard dev 2.7375402
Advantages Mean: 2.6236868
Advantages standard dev 2.7375402
Fidelity History: []
2024-01-25 15:25:12 INFO ---------------- FINISHED HPO ----------------
2024-01-25 15:25:12 INFO HPO completed in 39.71 seconds.
2024-01-25 15:25:12 INFO Best trial:
2024-01-25 15:25:12 INFO -------------------------
2024-01-25 15:25:12 INFO   Value: 0.8380874939958372
2024-01-25 15:25:12 INFO   Parameters: 
2024-01-25 15:25:12 INFO     N_UPDATES: 59
2024-01-25 15:25:12 INFO     N_EPOCHS: 19
2024-01-25 15:25:12 INFO     MINIBATCH_SIZE: 10
2024-01-25 15:25:12 INFO     BATCHSIZE_MULTIPLIER: 8
2024-01-25 15:25:12 INFO     LR: 0.00025200736268065753
2024-01-25 15:25:12 INFO     GAMMA: 0.9598486994011194
2024-01-25 15:25:12 INFO     GAE_LAMBDA: 0.9023413983152173
20

#### Quick Summary of HPO Task

In [8]:
optimizer.target_gate

{'target_gate': Instruction(name='cx', num_qubits=2, num_clbits=0, params=[]),
 'target_register': [0, 1]}

In [9]:
optimizer.hyperparams

['N_UPDATES',
 'N_EPOCHS',
 'MINIBATCH_SIZE',
 'BATCHSIZE_MULTIPLIER',
 'LR',
 'GAMMA',
 'GAE_LAMBDA',
 'ENT_COEF',
 'V_COEF',
 'GRADIENT_CLIP',
 'CLIP_VALUE_COEF',
 'CLIP_RATIO',
 'BATCHSIZE']

In [10]:
optimizer.num_hpo_trials

2

In [11]:
optimizer.best_hpo_configuration

{'best_avg_reward': 0.8380874939958372,
 'best_hyperparams': {'N_UPDATES': 59,
  'N_EPOCHS': 19,
  'MINIBATCH_SIZE': 10,
  'BATCHSIZE_MULTIPLIER': 8,
  'LR': 0.00025200736268065753,
  'GAMMA': 0.9598486994011194,
  'GAE_LAMBDA': 0.9023413983152173,
  'ENT_COEF': 0.00013578916457065266,
  'V_COEF': 0.5303366609068325,
  'GRADIENT_CLIP': 0.5585457404521005,
  'CLIP_VALUE_COEF': 0.17844644710924704,
  'CLIP_RATIO': 0.1180042374151018}}