### CX Calibration with HPO under the new code architecture / workflow (DEC 2023)

In [1]:
import sys
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
import time
import yaml
import pickle
import optuna
module_path = os.path.abspath(os.path.join('/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control'))
if module_path not in sys.path:
    sys.path.append(module_path)

from quantumenvironment import QuantumEnvironment
from agent import Agent
from gate_level_abstraction import gate_q_env_config
from helper_functions import load_agent_from_yaml_file, create_agent_config
from ppo import make_train_ppo
from qconfig import QEnvConfig

import logging
logging.basicConfig(
    level=logging.WARNING,
    format="%(asctime)s INFO %(message)s", # hardcoded INFO level
    datefmt="%Y-%m-%d %H:%M:%S",
    stream=sys.stdout,
)



Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...


  par.stderr = np.sqrt(self.result.covar[ivar, ivar])
  (par.stderr * np.sqrt(self.result.covar[jvar, jvar])))


Drag experiments done for qubit 1 done.
All single qubit calibrations are done
Updated Instruction Schedule Map <InstructionScheduleMap(1Q instructions:
  q0: {'delay', 'tdg', 'reset', 'measure', 'z', 's', 't', 'rz', 'sdg', 'h', 'x', 'id', 'sx'}
  q1: {'delay', 'tdg', 'reset', 'measure', 'z', 's', 't', 'rz', 'sdg', 'h', 'x', 'id', 'sx'}
Multi qubit instructions:
  (0, 1): {'cr45m', 'cr45p', 'ecr'}
  (1, 0): {'cr45m', 'cr45p', 'ecr'}
)>
Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done for qubit 1 done.
All single qubit calibrations are done
Updated Instruction Schedule Map <InstructionScheduleMap(1Q instructions:
  q0: {'delay', 'tdg', 'reset', 'measure', 'z', 's', 't', 'rz', 'sdg', 'h', 'x', 'id', 'sx'}
  q1: {'delay', 'tdg', 'reset', 'measure', 'z'

In [2]:
gate_q_env_config.target

{'register': [0, 1],
 'gate': Instruction(name='cx', num_qubits=2, num_clbits=0, params=[])}

### Perform HPO

In [3]:
from hyperparameter_optimization import HyperparameterOptimizer

In [4]:
path_agent_config = '/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control/template_configurations/agent_config.yaml'
save_results_path = 'hpo_results'

In [5]:
optimizer = HyperparameterOptimizer(gate_q_env_config, path_agent_config, save_results_path, log_progress=True, num_hpo_trials=1)
optimizer.optimize_hyperparameters()

SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])
2023-12-22 16:41:09 INFO num_HPO_trials: 1
2023-12-22 16:41:09 INFO ---------------- STARTING HPO ----------------


[I 2023-12-22 16:41:09,270] A new study created in memory with name: no-name-e699ce32-5949-4df1-bc73-3825f88a543d


SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


  0%|          | 0/41 [00:00<?, ?it/s]

SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


  2%|▏         | 1/41 [00:00<00:05,  7.60it/s]

mean tensor([ 0.0247, -0.0340, -0.0213, -0.0533, -0.0879,  0.1159, -0.0196])
Average return: 0.3339224082435516
DFE Rewards Mean: 0.3339224082435516
DFE Rewards standard dev 0.2820084946330522
Returns Mean: 0.5646662
Returns standard dev 0.6733907
Advantages Mean: 0.4985531
Advantages standard dev 0.67339075
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


  5%|▍         | 2/41 [00:00<00:04,  8.83it/s]

mean tensor([ 0.0014, -0.0245, -0.0110, -0.0491, -0.0911,  0.1005, -0.0240])
Average return: 0.3602802976544238
DFE Rewards Mean: 0.3602802976544238
DFE Rewards standard dev 0.29454900857501504
Returns Mean: 0.6682756
Returns standard dev 0.9827856
Advantages Mean: 0.43879142
Advantages standard dev 0.9827856
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


  7%|▋         | 3/41 [00:00<00:04,  9.19it/s]

mean tensor([ 0.0161, -0.0265,  0.0014, -0.0603, -0.0738,  0.1049, -0.0450])
Average return: 0.3855861596691957
DFE Rewards Mean: 0.3855861596691957
DFE Rewards standard dev 0.2887023807007702
Returns Mean: 0.69132364
Returns standard dev 0.9227202
Advantages Mean: 0.2648955
Advantages standard dev 0.9227202
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 10%|▉         | 4/41 [00:00<00:04,  8.88it/s]

mean tensor([ 0.0650,  0.0109, -0.0047, -0.0424, -0.0620,  0.1495, -0.1089])
Average return: 0.2524653036244355
DFE Rewards Mean: 0.2524653036244355
DFE Rewards standard dev 0.25254331449013545
Returns Mean: 0.40366212
Returns standard dev 0.60796106
Advantages Mean: -0.524246
Advantages standard dev 0.60796106
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 15%|█▍        | 6/41 [00:00<00:03,  9.50it/s]

mean tensor([ 0.0527,  0.0165,  0.0083, -0.0553, -0.0336,  0.1239, -0.0955])
Average return: 0.27463804435630323
DFE Rewards Mean: 0.27463804435630323
DFE Rewards standard dev 0.20418767352390899
Returns Mean: 0.37853697
Returns standard dev 0.37059927
Advantages Mean: -0.29488504
Advantages standard dev 0.37059927
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0022, -0.0116,  0.0265, -0.0152, -0.0992,  0.0994, -0.0395])
Average return: 0.4209322434841534
DFE Rewards Mean: 0.4209322434841534
DFE Rewards standard dev 0.2883182605419291
Returns Mean: 0.7999723
Returns standard dev 1.1327727
Advantages Mean: 0.48532778
Advantages standard dev 1.1327727
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])
mean tensor([-0.0050,  0.0047,  0.0350, -0.0020, -0.0729,  0.0835, -0.0390])


 17%|█▋        | 7/41 [00:00<00:03,  9.56it/s]

Average return: 0.4117215232756327
DFE Rewards Mean: 0.4117215232756327
DFE Rewards standard dev 0.2873520185614401
Returns Mean: 0.8291191
Returns standard dev 1.4171667
Advantages Mean: 0.3597991
Advantages standard dev 1.4171666
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])
mean tensor([ 0.0398,  0.0219,  0.0516, -0.0076, -0.0613,  0.1081, -0.0742])
Average return: 0.2638792405565292
DFE Rewards Mean: 0.2638792405565292
DFE Rewards standard dev 0.2044990698032077
Returns Mean: 0.36393198
Returns standard dev 0.37167045
Advantages Mean: -0.51127356
Advantages standard dev 0.37167045
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 22%|██▏       | 9/41 [00:00<00:03,  9.91it/s]

mean tensor([-0.0037,  0.0208,  0.0444, -0.0031, -0.0460,  0.1155, -0.0287])
Average return: 0.4361532028594158
DFE Rewards Mean: 0.4361532028594158
DFE Rewards standard dev 0.2906484975726572
Returns Mean: 0.82856596
Returns standard dev 1.0242088
Advantages Mean: 0.28166384
Advantages standard dev 1.0242087
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 24%|██▍       | 10/41 [00:01<00:03,  9.89it/s]

mean tensor([ 0.0030,  0.0223,  0.0322,  0.0025, -0.0552,  0.1713, -0.0276])
Average return: 0.2726550920359473
DFE Rewards Mean: 0.2726550920359473
DFE Rewards standard dev 0.2182292139910757
Returns Mean: 0.38273585
Returns standard dev 0.38383707
Advantages Mean: -0.49630082
Advantages standard dev 0.38383707
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 27%|██▋       | 11/41 [00:01<00:03,  9.89it/s]

mean tensor([ 0.0193,  0.0361,  0.0395, -0.0206, -0.0707,  0.2072, -0.0564])
Average return: 0.31290401431212916
DFE Rewards Mean: 0.31290401431212916
DFE Rewards standard dev 0.28129075326501507
Returns Mean: 0.543127
Returns standard dev 0.8645601
Advantages Mean: -0.1318836
Advantages standard dev 0.8645602
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0078,  0.0272,  0.0327, -0.0016, -0.0537,  0.1440, -0.0090])
Average return: 0.45912710331440765
DFE Rewards Mean: 0.45912710331440765
DFE Rewards standard dev 0.28050254264121427
Returns Mean: 0.8546663
Returns standard dev 0.97500587
Advantages Mean: 0.3313674
Advantages standard dev 0.97500587
Fidelity History: []


 32%|███▏      | 13/41 [00:01<00:02, 10.18it/s]

SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0042,  0.0244,  0.0320, -0.0244, -0.0801,  0.1584, -0.0091])
Average return: 0.46016097166847497
DFE Rewards Mean: 0.46016097166847497
DFE Rewards standard dev 0.2932400497101839
Returns Mean: 0.9097718
Returns standard dev 1.1216085
Advantages Mean: 0.2159741
Advantages standard dev 1.1216085
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0115,  0.0243,  0.0506, -0.0390, -0.0728,  0.1710, -0.0056])
Average return: 0.4874513319884176
DFE Rewards Mean: 0.4874513319884176
DFE Rewards standard dev 0.2713924776928725
Returns Mean: 0.91289306
Returns standard dev 0.8894902
Advantages Mean: 0.065156065
Advantages standard dev 0.8894902
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


 37%|███▋      | 15/41 [00:01<00:02, 10.13it/s]

mean tensor([ 0.0133,  0.0110,  0.0666, -0.0240, -0.0883,  0.1566,  0.0216])
Average return: 0.5076138349365723
DFE Rewards Mean: 0.5076138349365723
DFE Rewards standard dev 0.29858357428907656
Returns Mean: 1.1183946
Returns standard dev 1.5091647
Advantages Mean: 0.21521372
Advantages standard dev 1.5091647
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([ 0.0483, -0.0097,  0.0517, -0.0237, -0.0847,  0.1766,  0.0098])
Average return: 0.27554105658220723
DFE Rewards Mean: 0.27554105658220723
DFE Rewards standard dev 0.2298478271224465
Returns Mean: 0.42850107
Returns standard dev 0.7702776
Advantages Mean: -0.7463306
Advantages standard dev 0.7702777
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 41%|████▏     | 17/41 [00:01<00:02, 10.02it/s]

mean tensor([ 0.0133, -0.0195,  0.0452, -0.0029, -0.0889,  0.1192,  0.0146])
Average return: 0.29148729541757207
DFE Rewards Mean: 0.29148729541757207
DFE Rewards standard dev 0.22925144009536813
Returns Mean: 0.44495815
Returns standard dev 0.7398499
Advantages Mean: -0.4761432
Advantages standard dev 0.7398499
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([ 0.0088, -0.0605,  0.0523,  0.0108, -0.0889,  0.1394,  0.0016])
Average return: 0.2985987739503843
DFE Rewards Mean: 0.2985987739503843
DFE Rewards standard dev 0.22071919992157918
Returns Mean: 0.42363575
Returns standard dev 0.39867198
Advantages Mean: -0.30116928
Advantages standard dev 0.39867198
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0222, -0.0441,  0.0561, -0.0043, -0.0973,  0.0906, -0.0023])
Average return: 0.5195697533273482
DFE Rew

 46%|████▋     | 19/41 [00:01<00:02, 10.02it/s]

Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0156, -0.0557,  0.0523,  0.0026, -0.0756,  0.0807, -0.0011])
Average return: 0.591941916388977
DFE Rewards Mean: 0.591941916388977
DFE Rewards standard dev 0.2741715734560321
Returns Mean: 1.2710576
Returns standard dev 1.1778587
Advantages Mean: 0.51560605
Advantages standard dev 1.1778587
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])
mean tensor([-0.0161, -0.0638,  0.0262,  0.0160, -0.0711,  0.0623, -0.0166])
Average return: 0.5771595084983969
DFE Rewards Mean: 0.5771595084983969
DFE Rewards standard dev 0.28699173843581116
Returns Mean: 1.3947597
Returns standard dev 1.9024173
Advantages Mean: 0.43726888
Advantages standard dev 1.9024171
Fidelity History:

 54%|█████▎    | 22/41 [00:02<00:01,  9.95it/s]

 []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([ 0.0003, -0.0852,  0.0355,  0.0133, -0.0414,  0.0667, -0.0059])
Average return: 0.33190051847810254
DFE Rewards Mean: 0.33190051847810254
DFE Rewards standard dev 0.2522950386745914
Returns Mean: 0.51570326
Returns standard dev 0.53994846
Advantages Mean: -0.7283405
Advantages standard dev 0.53994846
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 59%|█████▊    | 24/41 [00:02<00:01,  9.89it/s]

mean tensor([-0.0109, -0.0775,  0.0422,  0.0158, -0.0558,  0.0278, -0.0002])
Average return: 0.3891474994194509
DFE Rewards Mean: 0.3891474994194509
DFE Rewards standard dev 0.25714357523539805
Returns Mean: 0.62113875
Returns standard dev 0.56538945
Advantages Mean: -0.3808325
Advantages standard dev 0.56538945
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([-0.0093, -0.0973,  0.0510,  0.0131, -0.0700,  0.0165, -0.0007])
Average return: 0.4416994688470218
DFE Rewards Mean: 0.4416994688470218
DFE Rewards standard dev 0.26057048362032775
Returns Mean: 0.7325058
Returns standard dev 0.6072578
Advantages Mean: 0.0034745217
Advantages standard dev 0.6072578
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 66%|██████▌   | 27/41 [00:02<00:01, 10.10it/s]

mean tensor([-0.0429, -0.0426,  0.0542,  0.0324, -0.0693,  0.0238,  0.0098])
Average return: 0.6641562552308105
DFE Rewards Mean: 0.6641562552308105
DFE Rewards standard dev 0.2513513555232911
Returns Mean: 1.6446253
Returns standard dev 1.8172297
Advantages Mean: 0.8947687
Advantages standard dev 1.8172297
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0429, -0.0404,  0.0843,  0.0476, -0.0463,  0.0286,  0.0259])
Average return: 0.7282517229311736
DFE Rewards Mean: 0.7282517229311736
DFE Rewards standard dev 0.21238632918853886
Returns Mean: 1.8472517
Returns standard dev 1.7851492
Advantages Mean: 0.88079286
Advantages standard dev 1.7851492
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0104, -0.0513,  0.1105,  0.0370,  0.0135,  0.0426,  0.0256])
Average return: 0.46015844828361174
DFE Rewards Mean: 0

 71%|███████   | 29/41 [00:02<00:01, 10.30it/s]

mean tensor([-0.0359, -0.0421,  0.0801,  0.0610,  0.0218,  0.0388,  0.0319])
Average return: 0.7231254505071578
DFE Rewards Mean: 0.7231254505071578
DFE Rewards standard dev 0.22653233709468862
Returns Mean: 1.8635682
Returns standard dev 1.7932072
Advantages Mean: 0.811536
Advantages standard dev 1.7932072
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0065, -0.0445,  0.0728,  0.0837,  0.0110,  0.0497,  0.0607])
Average return: 0.5396874183745642
DFE Rewards Mean: 0.5396874183745642
DFE Rewards standard dev 0.2686569355647852
Returns Mean: 1.0041006
Returns standard dev 0.7587148
Advantages Mean: -0.26367408
Advantages standard dev 0.7587148
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0147, -0.0681,  0.0933,  0.0592,  0.0043,  0.0692,  0.0646])
Average return: 0.6292848695325148
DFE Rewards Mean: 0.

 80%|████████  | 33/41 [00:03<00:00, 10.30it/s]

mean tensor([-0.0178, -0.0898,  0.0810,  0.0610, -0.0055,  0.0590,  0.0679])
Average return: 0.6032268054238098
DFE Rewards Mean: 0.6032268054238098
DFE Rewards standard dev 0.24655420643506398
Returns Mean: 1.228685
Returns standard dev 1.1588945
Advantages Mean: 0.023152461
Advantages standard dev 1.1588945
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])
mean tensor([-0.0305, -0.0557,  0.0745,  0.0497, -0.0132,  0.0260,  0.0266])
Average return: 0.8192968623728628
DFE Rewards Mean: 0.8192968623728628
DFE Rewards standard dev 0.15576607863080136
Returns Mean: 2.3489258
Returns standard dev 1.9294285
Advantages Mean: 1.1288345
Advantages standard dev 1.9294285
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0152, -0.0580,  0.0534,  0.0500, -0.0080,  0.0275,  0.0364])
Average return: 0.8095557154977432
DFE Rewards Mea

 85%|████████▌ | 35/41 [00:03<00:00, 10.19it/s]

mean tensor([-0.0175, -0.0521,  0.0548,  0.0409,  0.0008,  0.0076,  0.0234])
Average return: 0.8965599802734157
DFE Rewards Mean: 0.8965599802734157
DFE Rewards standard dev 0.08901552195752253
Returns Mean: 3.1307304
Returns standard dev 2.5322015
Advantages Mean: 1.4637333
Advantages standard dev 2.5322015
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0225, -0.0798,  0.0613,  0.0650,  0.0215, -0.0050, -0.0064])
Average return: 0.8044381662642647
DFE Rewards Mean: 0.8044381662642647
DFE Rewards standard dev 0.20415453368402267
Returns Mean: 2.644345
Returns standard dev 2.5238638
Advantages Mean: 0.73219174
Advantages standard dev 2.5238638
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])
mean tensor([-0.0198, -0.0619,  0.0568,  0.0964,  0.0344,  0.0094, -0.0173])
Average return: 0.8517239167530126
DFE Rewards Mean

 95%|█████████▌| 39/41 [00:03<00:00, 10.32it/s]

mean tensor([ 0.0340, -0.1205,  0.0675,  0.0746,  0.0357,  0.0004, -0.0500])
Average return: 0.8592462341786888
DFE Rewards Mean: 0.8592462341786888
DFE Rewards standard dev 0.12058967184679621
Returns Mean: 2.624636
Returns standard dev 2.0748253
Advantages Mean: 0.2185318
Advantages standard dev 2.0748253
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0121, -0.0726,  0.0542,  0.0559,  0.0344, -0.0128, -0.0204])
Average return: 0.919771507547959
DFE Rewards Mean: 0.919771507547959
DFE Rewards standard dev 0.08371806696945931
Returns Mean: 3.4585795
Returns standard dev 2.4844203
Advantages Mean: 1.186446
Advantages standard dev 2.48442
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0161, -0.0548,  0.0747,  0.0179,  0.0162, -0.0195, -0.0305])
Average return: 0.9705688684334822
DFE Rewards Mean: 0.970568

100%|██████████| 41/41 [00:04<00:00, 10.01it/s]
[I 2023-12-22 16:41:13,446] Trial 0 finished with value: 0.919771507547959 and parameters: {'N_UPDATES': 41, 'N_EPOCHS': 8, 'MINIBATCH_SIZE': 96, 'BATCHSIZE_MULTIPLIER': 5, 'LR': 0.004328386484390127, 'GAMMA': 0.982912259126364, 'GAE_LAMBDA': 0.9509902702390876, 'ENT_COEF': 0.0005003431067605046, 'V_COEF': 0.37482712101861676, 'GRADIENT_CLIP': 0.3118868436386038, 'CLIP_VALUE_COEF': 0.18832393522435506, 'CLIP_RATIO': 0.11896196318308204}. Best is trial 0 with value: 0.919771507547959.


mean tensor([ 0.0221, -0.0907,  0.0814,  0.0091,  0.0283, -0.0079, -0.0325])
Average return: 0.9840382269285838
DFE Rewards Mean: 0.9840382269285838
DFE Rewards standard dev 0.02022401408049091
Returns Mean: 6.1708503
Returns standard dev 3.892446
Advantages Mean: 3.0529683
Advantages standard dev 3.8924463
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0313, -0.1003,  0.0890,  0.0042,  0.0279, -0.0144, -0.0613])
Average return: 0.937077008492953
DFE Rewards Mean: 0.937077008492953
DFE Rewards standard dev 0.07634692418079787
Returns Mean: 4.382648
Returns standard dev 3.418599
Advantages Mean: 0.71017724
Advantages standard dev 3.4185987
Fidelity History: []
2023-12-22 16:41:13 INFO ---------------- FINISHED HPO ----------------
2023-12-22 16:41:13 INFO HPO completed in 4.18 seconds.
2023-12-22 16:41:13 INFO Best trial:
2023-12-22 16:41:13 INFO -------------------------
2023-12-22 16:41:13 INF

In [10]:
optimizer.target_gate

{'target_gate': Instruction(name='cx', num_qubits=2, num_clbits=0, params=[]),
 'target_register': [0, 1]}

In [6]:
optimizer.hyperparams

['N_UPDATES',
 'N_EPOCHS',
 'MINIBATCH_SIZE',
 'BATCHSIZE_MULTIPLIER',
 'LR',
 'GAMMA',
 'GAE_LAMBDA',
 'ENT_COEF',
 'V_COEF',
 'GRADIENT_CLIP',
 'CLIP_VALUE_COEF',
 'CLIP_RATIO',
 'BATCHSIZE']

In [7]:
optimizer.num_hpo_trials

1

In [8]:
optimizer.best_hpo_configuration

{'best_avg_return': 0.919771507547959,
 'best_hyperparams': {'N_UPDATES': 41,
  'N_EPOCHS': 8,
  'MINIBATCH_SIZE': 96,
  'BATCHSIZE_MULTIPLIER': 5,
  'LR': 0.004328386484390127,
  'GAMMA': 0.982912259126364,
  'GAE_LAMBDA': 0.9509902702390876,
  'ENT_COEF': 0.0005003431067605046,
  'V_COEF': 0.37482712101861676,
  'GRADIENT_CLIP': 0.3118868436386038,
  'CLIP_VALUE_COEF': 0.18832393522435506,
  'CLIP_RATIO': 0.11896196318308204}}