### CX Calibration with HPO under the new code architecture / workflow

In [1]:
import sys
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
import time
import yaml
import pickle
import optuna
module_path = os.path.abspath(os.path.join('/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control'))
if module_path not in sys.path:
    sys.path.append(module_path)

from quantumenvironment import QuantumEnvironment
from agent import Agent
from template_configurations import gate_q_env_config
from helper_functions import load_agent_from_yaml_file, create_agent_config
from ppo import make_train_ppo
from qconfig import QEnvConfig

import logging
logging.basicConfig(
    level=logging.WARNING,
    format="%(asctime)s INFO %(message)s", # hardcoded INFO level
    datefmt="%Y-%m-%d %H:%M:%S",
    stream=sys.stdout,
)



Starting Rabi experiment for qubit 0...
Rabi experiment for qubit 0 done.
Starting Drag experiment for qubit 0...
Drag experiments done for qubit 0 done.
Starting Rabi experiment for qubit 1...
Rabi experiment for qubit 1 done.
Starting Drag experiment for qubit 1...
Drag experiments done for qubit 1 done.
All single qubit calibrations are done
Updated Instruction Schedule Map <InstructionScheduleMap(1Q instructions:
  q0: {'measure', 's', 'h', 'rz', 'sdg', 'tdg', 'x', 'delay', 'z', 't', 'sx', 'reset', 'id'}
  q1: {'measure', 's', 'h', 'rz', 'sdg', 'tdg', 'x', 'delay', 'z', 't', 'sx', 'reset', 'id'}
Multi qubit instructions:
  (0, 1): {'ecr', 'cr45m', 'cr45p'}
  (1, 0): {'ecr', 'cr45m', 'cr45p'}
)>


In [2]:
gate_q_env_config.target

{'register': [0, 1],
 'gate': Instruction(name='cx', num_qubits=2, num_clbits=0, params=[])}

### Perform HPO

In [3]:
from hyperparameter_optimization import HyperparameterOptimizer

In [4]:
path_agent_config = '/Users/lukasvoss/Documents/Master Wirtschaftsphysik/Masterarbeit Yale-NUS CQT/Quantum_Optimal_Control/template_configurations/agent_config.yaml'
save_results_path = 'hpo_results'

In [5]:
optimizer = HyperparameterOptimizer(gate_q_env_config, path_agent_config, save_results_path, log_progress=True, num_hpo_trials=1)
optimizer.optimize_hyperparameters()

SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])
2023-12-23 00:07:55 INFO num_HPO_trials: 1
2023-12-23 00:07:55 INFO ---------------- STARTING HPO ----------------


[I 2023-12-23 00:07:55,582] A new study created in memory with name: no-name-2d779743-c596-4a0e-af89-4e91ec314f35


SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j,  0.25+0.j, -0.25+0.j])


  0%|          | 0/20 [00:00<?, ?it/s]

SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


  5%|▌         | 1/20 [00:00<00:04,  4.59it/s]

mean tensor([-0.0720, -0.0462,  0.0523,  0.0769,  0.0618, -0.0354,  0.0755])
Average return: 0.3104491438766685
DFE Rewards Mean: 0.3104491438766685
DFE Rewards standard dev 0.28816555159621515
Returns Mean: 0.59442985
Returns standard dev 1.145644
Advantages Mean: 0.4529802
Advantages standard dev 1.145644
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 10%|█         | 2/20 [00:00<00:02,  6.11it/s]

mean tensor([ 0.0013, -0.0870,  0.0947,  0.1635,  0.0720, -0.0706,  0.1047])
Average return: 0.2662215904470924
DFE Rewards Mean: 0.2662215904470924
DFE Rewards standard dev 0.2623883453811674
Returns Mean: 0.42341322
Returns standard dev 0.5468582
Advantages Mean: -0.38391724
Advantages standard dev 0.5468582
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 20%|██        | 4/20 [00:00<00:03,  4.93it/s]

mean tensor([ 0.0333, -0.0668,  0.0775,  0.1809,  0.1041, -0.0193,  0.0908])
Average return: 0.27224696796368364
DFE Rewards Mean: 0.27224696796368364
DFE Rewards standard dev 0.2017948900554707
Returns Mean: 0.3709127
Returns standard dev 0.3511727
Advantages Mean: -0.2499692
Advantages standard dev 0.3511727
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0156, -0.0334,  0.0643,  0.0893,  0.0365,  0.0057,  0.0849])
Average return: 0.4452976661287527
DFE Rewards Mean: 0.4452976661287527
DFE Rewards standard dev 0.2926338239280477
Returns Mean: 0.8984009
Returns standard dev 1.3354753
Advantages Mean: 0.61992556
Advantages standard dev 1.3354753
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YZ', 'ZX'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])


 30%|███       | 6/20 [00:01<00:02,  6.44it/s]

mean tensor([ 0.0498, -0.0958,  0.0137,  0.0644,  0.0628, -0.0478,  0.0633])
Average return: 0.2555766508430881
DFE Rewards Mean: 0.2555766508430881
DFE Rewards standard dev 0.19677312688283952
Returns Mean: 0.34120235
Returns standard dev 0.31305873
Advantages Mean: -0.13664764
Advantages standard dev 0.31305873
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])
mean tensor([ 0.0532, -0.0616,  0.0488,  0.0643,  0.0561, -0.0512,  0.0289])
Average return: 0.2716838495797264
DFE Rewards Mean: 0.2716838495797264
DFE Rewards standard dev 0.19843042555596963
Returns Mean: 0.36597183
Returns standard dev 0.32589594
Advantages Mean: -0.016362699
Advantages standard dev 0.32589594
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 40%|████      | 8/20 [00:01<00:01,  7.38it/s]

mean tensor([ 0.0442, -0.0059,  0.0713,  0.0393,  0.0478, -0.0478, -0.0046])
Average return: 0.26833691064147375
DFE Rewards Mean: 0.26833691064147375
DFE Rewards standard dev 0.2070548044082298
Returns Mean: 0.37216944
Returns standard dev 0.38132143
Advantages Mean: -0.022031147
Advantages standard dev 0.38132143
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0350, -0.0235,  0.0734,  0.0300,  0.0363, -0.0657, -0.0142])
Average return: 0.33245623955879394
DFE Rewards Mean: 0.33245623955879394
DFE Rewards standard dev 0.22242360609612302
Returns Mean: 0.4819039
Returns standard dev 0.43438798
Advantages Mean: 0.11889658
Advantages standard dev 0.43438798
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'XI', 'XZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 50%|█████     | 10/20 [00:01<00:01,  7.64it/s]

mean tensor([ 0.0329, -0.0519,  0.0588,  0.0339,  0.0046, -0.0710,  0.0109])
Average return: 0.5400920445060734
DFE Rewards Mean: 0.5400920445060734
DFE Rewards standard dev 0.2768538196597294
Returns Mean: 1.1780207
Returns standard dev 1.5760458
Advantages Mean: 0.69913507
Advantages standard dev 1.5760458
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([ 0.0149, -0.0491,  0.0614,  0.0505, -0.0293, -0.0393, -0.0144])
Average return: 0.3793708197580198
DFE Rewards Mean: 0.3793708197580198
DFE Rewards standard dev 0.24252749062683002
Returns Mean: 0.58623564
Returns standard dev 0.52225506
Advantages Mean: -0.16916993
Advantages standard dev 0.52225506
Fidelity History: []
SparsePauliOp(['II', 'XY', 'YX', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


 60%|██████    | 12/20 [00:01<00:01,  7.98it/s]

mean tensor([ 0.0047, -0.0579,  0.0825,  0.0083, -0.0328, -0.0510, -0.0177])
Average return: 0.46360570941655876
DFE Rewards Mean: 0.46360570941655876
DFE Rewards standard dev 0.2461524796734564
Returns Mean: 0.79442066
Returns standard dev 0.8623529
Advantages Mean: 0.19713157
Advantages standard dev 0.8623529
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0494, -0.0627,  0.1031,  0.0264, -0.0404, -0.0448, -0.0080])
Average return: 0.7047862182432507
DFE Rewards Mean: 0.7047862182432507
DFE Rewards standard dev 0.21988725167560189
Returns Mean: 1.6958557
Returns standard dev 1.5257628
Advantages Mean: 0.91592675
Advantages standard dev 1.5257628
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j,  0.25+0.j])


 70%|███████   | 14/20 [00:02<00:00,  7.86it/s]

mean tensor([-0.0095, -0.0803,  0.0663, -0.0173,  0.0036, -0.0783, -0.0240])
Average return: 0.5953516359304483
DFE Rewards Mean: 0.5953516359304483
DFE Rewards standard dev 0.22913334083701428
Returns Mean: 1.1300436
Returns standard dev 0.91213477
Advantages Mean: 0.13261017
Advantages standard dev 0.91213477
Fidelity History: []
SparsePauliOp(['II', 'IY', 'XI', 'XY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([-0.0226, -0.0894,  0.0502, -0.0115,  0.0029, -0.0709, -0.0284])
Average return: 0.5032446649168294
DFE Rewards Mean: 0.5032446649168294
DFE Rewards standard dev 0.2650552155216209
Returns Mean: 0.93935686
Returns standard dev 0.9745769
Advantages Mean: -0.11496029
Advantages standard dev 0.9745769
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])


 80%|████████  | 16/20 [00:02<00:00,  8.15it/s]

mean tensor([-0.0439, -0.0644,  0.0736, -0.0367,  0.0219, -0.0271, -0.0334])
Average return: 0.8052078788682994
DFE Rewards Mean: 0.8052078788682994
DFE Rewards standard dev 0.15083294014973123
Returns Mean: 2.1825287
Returns standard dev 1.7994984
Advantages Mean: 1.1834878
Advantages standard dev 1.7994984
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'ZI', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j, -0.25+0.j, -0.25+0.j])
mean tensor([-0.0088, -0.1112,  0.0454, -0.0058,  0.0144, -0.0463, -0.0321])
Average return: 0.8866089302687679
DFE Rewards Mean: 0.8866089302687679
DFE Rewards standard dev 0.09966202542900064
Returns Mean: 2.8926032
Returns standard dev 2.1338577
Advantages Mean: 1.5777336
Advantages standard dev 2.1338577
Fidelity History: []
SparsePauliOp(['II', 'IZ', 'YI', 'YZ'],
              coeffs=[ 0.25+0.j, -0.25+0.j, -0.25+0.j,  0.25+0.j])


 90%|█████████ | 18/20 [00:02<00:00,  8.19it/s]

mean tensor([-0.0231, -0.1252,  0.0359, -0.0061,  0.0157, -0.0247, -0.0108])
Average return: 0.8137680418239258
DFE Rewards Mean: 0.8137680418239258
DFE Rewards standard dev 0.15781369235142798
Returns Mean: 2.340062
Returns standard dev 2.0948775
Advantages Mean: 0.66861117
Advantages standard dev 2.0948775
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YZ', 'ZY'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])
mean tensor([ 0.0209, -0.1300,  0.0588, -0.0009, -0.0147,  0.0247,  0.0137])
Average return: 0.7941403483286485
DFE Rewards Mean: 0.7941403483286485
DFE Rewards standard dev 0.1827672208751961
Returns Mean: 2.1626546
Returns standard dev 1.7580746
Advantages Mean: 0.20545149
Advantages standard dev 1.7580748
Fidelity History: []
SparsePauliOp(['II', 'IX', 'XI', 'XX'],
              coeffs=[0.25+0.j, 0.25+0.j, 0.25+0.j, 0.25+0.j])


100%|██████████| 20/20 [00:02<00:00,  7.19it/s]
[I 2023-12-23 00:07:58,477] Trial 0 finished with value: 0.8617435751783177 and parameters: {'N_UPDATES': 20, 'N_EPOCHS': 14, 'MINIBATCH_SIZE': 96, 'BATCHSIZE_MULTIPLIER': 5, 'LR': 0.006767677946519087, 'GAMMA': 0.9561771362827869, 'GAE_LAMBDA': 0.9589510748087021, 'ENT_COEF': 0.000694685088419939, 'V_COEF': 0.2570973727277017, 'GRADIENT_CLIP': 0.8409877787848272, 'CLIP_VALUE_COEF': 0.298087314059173, 'CLIP_RATIO': 0.2736523661628351}. Best is trial 0 with value: 0.8617435751783177.


mean tensor([ 0.0139, -0.0899,  0.1011, -0.0082, -0.0086,  0.0160, -0.0036])
Average return: 0.8617435751783177
DFE Rewards Mean: 0.8617435751783177
DFE Rewards standard dev 0.13803915070443884
Returns Mean: 2.8858993
Returns standard dev 2.480107
Advantages Mean: 0.7201486
Advantages standard dev 2.480107
Fidelity History: []
SparsePauliOp(['II', 'XX', 'YY', 'ZZ'],
              coeffs=[ 0.25+0.j,  0.25+0.j,  0.25+0.j, -0.25+0.j])
mean tensor([ 0.0081, -0.1095,  0.0757, -0.0053,  0.0068,  0.0066, -0.0021])
Average return: 0.914288077972121
DFE Rewards Mean: 0.914288077972121
DFE Rewards standard dev 0.08129016255054988
Returns Mean: 3.3697195
Returns standard dev 2.5820243
Advantages Mean: 0.88649833
Advantages standard dev 2.5820243
Fidelity History: []
2023-12-23 00:07:58 INFO ---------------- FINISHED HPO ----------------
2023-12-23 00:07:58 INFO HPO completed in 2.9 seconds.
2023-12-23 00:07:58 INFO Best trial:
2023-12-23 00:07:58 INFO -------------------------
2023-12-23 00:07:58

In [6]:
optimizer.target_gate

{'target_gate': Instruction(name='cx', num_qubits=2, num_clbits=0, params=[]),
 'target_register': [0, 1]}

In [7]:
optimizer.hyperparams

['N_UPDATES',
 'N_EPOCHS',
 'MINIBATCH_SIZE',
 'BATCHSIZE_MULTIPLIER',
 'LR',
 'GAMMA',
 'GAE_LAMBDA',
 'ENT_COEF',
 'V_COEF',
 'GRADIENT_CLIP',
 'CLIP_VALUE_COEF',
 'CLIP_RATIO',
 'BATCHSIZE']

In [8]:
optimizer.num_hpo_trials

1

In [9]:
optimizer.best_hpo_configuration

{'best_avg_return': 0.8617435751783177,
 'best_hyperparams': {'N_UPDATES': 20,
  'N_EPOCHS': 14,
  'MINIBATCH_SIZE': 96,
  'BATCHSIZE_MULTIPLIER': 5,
  'LR': 0.006767677946519087,
  'GAMMA': 0.9561771362827869,
  'GAE_LAMBDA': 0.9589510748087021,
  'ENT_COEF': 0.000694685088419939,
  'V_COEF': 0.2570973727277017,
  'GRADIENT_CLIP': 0.8409877787848272,
  'CLIP_VALUE_COEF': 0.298087314059173,
  'CLIP_RATIO': 0.2736523661628351}}