In [None]:
# Cell 1: Imports and Configuration
# Imports essential libraries for data serialization and visualization.

import pickle  # Used for loading experiment data

# PyTorch (Required for unpickling data containing Tensors)
import torch

# Matplotlib for plotting training results
import matplotlib.pyplot as plt

In [None]:
# Cell 2: Load Experiment Data
# Loads the training logs and performance metrics for the 'Parity' task from disk.

with open('reward_ep_list_parity.pkl', 'rb') as file: reward_ep_list = pickle.load(file)
with open('reward_sum_ep_list_parity.pkl', 'rb') as file: reward_sum_ep_list = pickle.load(file)
with open('obs_ep_list_parity.pkl', 'rb') as file: obs_ep_list = pickle.load(file)
with open('outs_ep_list_parity.pkl', 'rb') as file: outs_ep_list = pickle.load(file)

In [None]:
# Cell 4: Cumulative Reward Visualization
# Visualizes the cumulative reward trend across all episodes.

x = [x for x in range(len(reward_sum_ep_list))] 

plt.scatter(x, reward_sum_ep_list, alpha=0.3, s=10)
plt.savefig('p1.png', dpi=1000)

In [None]:
# Cell 5: Episode Reward Visualization
# Visualizes the reward obtained in each episode.

x_r = [0, 4999]
y_r = [13.978015640953432, 13.978015640953432]

plt.scatter(x, reward_ep_list, alpha=0.3, s=10)
plt.savefig('p2.png', dpi=200)

In [None]:
# Cell 6: Best Reward Identification
# Identifies the maximum reward achieved and the corresponding episode index.

m_val = max(reward_ep_list)
reward_ep_list.index(m_val)

In [None]:
# Cell 7: Best Episode Actions
# Displays the optimization history (Cost & Accuracy changes) for the best episode.
# Note: This data represents the parameter tuning process (15 steps), NOT the gate sequence.

# Find the index of the best episode
best_idx = reward_ep_list.index(m_val)
best_data = outs_ep_list[best_idx]

print(f"Best Episode Index: {best_idx}")
print("-" * 50)
# Corrected Labels based on parity_definition.ipynb analysis
print(f"{'Opt Step':<10} | {'Cost (Loss)':<15} | {'Accuracy':<10}")
print("-" * 50)

for step_data in best_data:
    # Data structure: [Iteration, Cost, Accuracy]
    iteration, cost, acc = step_data
    print(f"{int(iteration):<10} | {cost:<15.6f} | {acc:<10.4f}")

In [None]:
# Cell 8: Best Episode Observations
# Displays the sequence of observations (states) encountered by the agent during the best performing episode.

print(obs_ep_list[reward_ep_list.index(m_val)])