In [None]:
# Cell 1: Imports and Configuration
# Imports necessary libraries for data loading (pickle), quantum simulation (PennyLane), 
# deep learning (Torch), and visualization (Matplotlib).

import pickle
import numpy as np
import torch
import matplotlib.pyplot as plt
import pennylane as qml
import import_ipynb

In [None]:
# Cell 2: Import Custom Environment
# Imports the specific model definition (Environment & Classifier) to reconstruct the trained model.
# Note: 'model_c_definition_rl' is used here as the base environment.
from model_c_definition_rl import opt_classifier
import model_c_definition_rl as qenv

In [None]:
# Cell 3: Device Check
# Verifies if CUDA (GPU) is available for accelerated inference.
# Returns True if a CUDA-enabled GPU is detected, otherwise False.
torch.cuda.is_available()

In [None]:
# Cell 4: Load Training Logs
# Loads the serialized training history and model checkpoints from disk.
# These files contain rewards, accuracy metrics, circuit structures (obs), and final weights.

with open('reward_ep_list_non.pkl', 'rb') as file: reward_ep_list = pickle.load(file)
with open('reward_sum_ep_list_non.pkl', 'rb') as file: reward_sum_ep_list = pickle.load(file)
with open('obs_ep_list_non.pkl', 'rb') as file: obs_ep_list = pickle.load(file)
with open('outs_ep_list_non.pkl', 'rb') as file: outs_ep_list = pickle.load(file)
with open('figset_ep_list_non.pkl', 'rb') as file: figset_ep_list = pickle.load(file)
with open('final_ep_list_non.pkl', 'rb') as file: final_ep_list = pickle.load(file)
with open('acc_val_last_ep_non.pkl', 'rb') as file: acc_train_ep = pickle.load(file)
with open('acc_val_max_ep_non.pkl', 'rb') as file: acc_val_max = pickle.load(file)
with open('cost_ep_non.pkl', 'rb') as file: cost_ep = pickle.load(file)

In [None]:
# Cell 5: Cumulative Reward Visualization
# Visualizes the cumulative reward obtained in each episode.
# This metric helps in understanding the agent's total return and exploration behavior over time.

# Create x-axis indices corresponding to each episode
x = [x for x in range(len(reward_sum_ep_list))] 

# Plot the cumulative rewards as a scatter plot
# alpha=0.4 adds transparency to handle overlapping points
# s=10 sets the size of the markers
plt.scatter(x, [t for t in reward_sum_ep_list], alpha=0.4, s=10)

# Save the plot to a file
plt.savefig('p1.png', dpi=200)

In [None]:
# Cell 6: Max Reward Visualization
# Visualizes the maximum reward achieved in each episode to track the agent's peak performance.
# Unlike cumulative reward, this metric highlights the best single-step decision or short-term success.

# Plot the maximum rewards as a scatter plot
# label='RL' adds a legend entry for the Reinforcement Learning agent
plt.scatter(x, [t for t in reward_ep_list], alpha=0.4, s=10, label = 'RL')

# Save the plot with high resolution (200 DPI) for paper inclusion
plt.savefig('p2.png', dpi=200)

In [None]:
# Cell 7: Accuracy Visualization
# Plots the maximum validation accuracy achieved in each episode to analyze learning stability.
# The x-axis represents the training episodes, and the y-axis represents the validation accuracy.

plt.xlabel('Episode')
plt.ylabel('Accuracy')
plt.plot(acc_val_max)
plt.show()
plt.savefig('acc.png')

In [None]:
# Cell 8: Cost Visualization
# Plots the cost (loss) function values over episodes to verify convergence.
# The x-axis represents the training episodes, and the y-axis represents the cost value.

plt.xlabel('Episode')
plt.ylabel('Cost')
plt.plot(cost_ep)
plt.show()

In [None]:
# Cell 9: Best Episode Identification
# Identifies the episode with the highest validation accuracy to pinpoint the best performing model.

# Retrieve the maximum validation accuracy achieved across all episodes
best_val = max(acc_val_max)

# Find the index of the best accuracy and convert it to a 1-based episode number
best_acc = acc_val_max.index(best_val) + 1

# Print the best episode number and the corresponding accuracy for verification
print(best_acc, best_val)

In [None]:
# Cell 10: Circuit Reconstruction & Visualization (Automated Best Selection)
# Reconstructs and visualizes the quantum circuit structure (Ansatz) of the best-performing agent.
# This validates the topology discovered by the RL agent without manual index selection.

try:
    N_QUBITS = qenv.N_QUBITS
except AttributeError:
    N_QUBITS = 6

import numpy as np, pennylane as qml

def draw_from_snapshot(gs_prefix, w_rl): 
    """
    Generates a visual representation of the quantum circuit using PennyLane's drawer.
    Args:
        gs_prefix: The sequence of gates (actions) determined by the RL agent.
        w_rl: Learned weights for the RL-appended gates.
    """
    x_draw = np.zeros(2**N_QUBITS, dtype=np.float32); 
    x_draw[0] = 1.0 # Initialize input state |0...0>

    return qml.draw(qenv.circuit)(
        w_rl.detach().cpu().numpy(),
        x_draw,
        gs_prefix
    )

# Automatically select the best episode identified in the previous cell.
# 'best_acc' contains the 1-based episode number with the highest validation accuracy.
idx = best_acc - 1 

episode_num = idx + 1
accuracy_val = acc_val_max[idx]
print(f"Best Episode: {episode_num}, Accuracy: {accuracy_val:.4f}")

# Retrieve the model snapshot (weights and architecture) for the best episode
(final, gs_prefix) = final_ep_list[idx]
w_rl, bias, *_ = final

# Draw and print the circuit diagram of the discovered Ansatz
print(draw_from_snapshot(gs_prefix, w_rl))

# Inspect the learned parameters for the RL-added gates
print("Learned Parameters of RL-Added Gates:", w_rl.detach().cpu().numpy())