In [None]:
%matplotlib inline
import sys
import os
import numpy as np
import matplotlib.pyplot as plt

#
# os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
from q_env_config import (
    q_env_config as gate_q_env_config,
    circuit_context,
)
from rl_qoc import QuantumEnvironment, ContextAwareQuantumEnvironment
from gymnasium.wrappers import RescaleAction, ClipAction

from rl_qoc.ppo_config import (
    TotalUpdates,
    HardwareRuntime,
    TrainingConfig,
    TrainFunctionSettings,
)

circuit_context.draw("mpl")

In [None]:
use_context = False
if use_context:
    q_env = ContextAwareQuantumEnvironment(
        gate_q_env_config, circuit_context, training_steps_per_gate=250
    )
else:
    q_env = QuantumEnvironment(gate_q_env_config)
rescaled_env = RescaleAction(ClipAction(q_env), -1.0, 1.0)

In [None]:
q_env.circuits[0].draw("mpl", style="iqp")

In [None]:
fig = None
if use_context:
    fig = q_env.circuit_context.draw("mpl")
fig

In [None]:
from rl_qoc.helper_functions import load_from_yaml_file
from pathlib import Path

# Change the file_name to the name of the agent configuration file and specify the file location
file_name = "agent_config.yaml"
file_location = Path.cwd().parent.parent.parent / "template_configurations" / file_name

agent_config = load_from_yaml_file(file_location)

In [None]:
from rl_qoc import CustomPPO

ppo_agent = CustomPPO(agent_config, rescaled_env)

In [None]:
q_env.layout

In [None]:
total_updates = TotalUpdates(100)
# hardware_runtime = HardwareRuntime(300)
training_config = TrainingConfig(
    training_constraint=total_updates,
    target_fidelities=[0.999, 0.9999],
    lookback_window=10,
    anneal_learning_rate=False,
    std_actions_eps=1e-2,
)

train_function_settings = TrainFunctionSettings(
    plot_real_time=True,
    print_debug=True,
    num_prints=1,
    hpo_mode=False,
    clear_history=True,
)

In [None]:
training_results = ppo_agent.train(
    training_config=training_config, train_function_settings=train_function_settings
)

In [None]:
training_results["env_ident_str"]

In [None]:
list(training_results.keys())

In [None]:
training_results["action_history"]

In [None]:
q_env.unwrapped.optimal_action