In [None]:
# Cell 1: Imports and Configuration 
# Imports essential libraries for data serialization and visualization.

import pickle  # Used for loading experiment data

# PyTorch (Required for unpickling data containing Tensors)
import torch

# Matplotlib for plotting training results
import matplotlib.pyplot as plt

In [None]:
# Cell 2: Device Configuration
# Checks if a CUDA-enabled GPU is available and sets the device to 'cuda' or 'cpu'.

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device set to: {device}")

In [None]:
# Cell 3: Load Experiment Data (Iris)
# Loads the training logs and performance metrics for the 'Iris' classification task.

with open('reward_ep_list_iris.pkl', 'rb') as file: reward_ep_list = pickle.load(file)
with open('reward_sum_ep_list_iris.pkl', 'rb') as file: reward_sum_ep_list = pickle.load(file)
with open('obs_ep_list_iris.pkl', 'rb') as file: obs_ep_list = pickle.load(file)
with open('outs_ep_list_iris.pkl', 'rb') as file: outs_ep_list = pickle.load(file)

In [None]:
# Cell 4: Cumulative Reward Visualization
# Visualizes the cumulative reward trend across all episodes for the Iris task.

# Generate x-axis indices corresponding to each episode
x = [x for x in range(len(reward_sum_ep_list))] 

# Plot the cumulative rewards as a scatter plot
# Note: Converts tensors to CPU memory ([t.cpu()...]) to ensure compatibility with Matplotlib.
plt.scatter(x, [t.cpu() for t in reward_sum_ep_list], alpha=0.3, s=10)

# Save the plot as an image file
plt.savefig('p1.png', dpi=200)

In [None]:
# Cell 5: Episode Reward Visualization
# Visualizes the reward obtained in each episode for the Iris task.

# Plot the episode rewards as a scatter plot.
# Note: Converts GPU tensors to CPU memory ([t.cpu()...]) for compatibility with Matplotlib.
plt.scatter(x, [t.cpu() for t in reward_ep_list], alpha=0.3, s=10)

# Save the plot as a high-resolution image
plt.savefig('p2.png', dpi=200)

In [None]:
# Cell 6: Best Reward Identification (Iris)
# Identifies the maximum reward achieved and the corresponding episode index for the Iris task.

m_val = max(reward_ep_list)
reward_ep_list.index(m_val)

In [None]:
# Cell 7: Best Episode Optimization Log
# Displays the optimization history with clear column names.

# Find the index of the best episode
best_idx = reward_ep_list.index(m_val)
best_data = outs_ep_list[best_idx]

print(f"Best Episode Index: {best_idx}")
print("-" * 65)
print(f"{'Opt Step':<10} | {'Cost (Loss)':<15} | {'Train Acc':<12} | {'Test Acc':<12}")
print("-" * 65)

for step_data in best_data:
    
    # 1. Iteration
    it_val = int(step_data[0]) if not isinstance(step_data[0], torch.Tensor) else int(step_data[0].item())
    
    # 2. Cost
    cost_val = step_data[1] if not isinstance(step_data[1], torch.Tensor) else step_data[1].item()
    
    # 3. Train Accuracy
    train_acc = step_data[2] if not isinstance(step_data[2], torch.Tensor) else step_data[2].item()
    
    # 4. Test Accuracy
    # Check if the 4th element exists to avoid errors
    if len(step_data) > 3:
        test_acc = step_data[3] if not isinstance(step_data[3], torch.Tensor) else step_data[3].item()
    else:
        test_acc = 0.0

    print(f"{it_val:<10} | {cost_val:<15.6f} | {train_acc:<12.4f} | {test_acc:<12.4f}")
    

In [None]:
# Cell 8: Best Episode Observations
# Displays the sequence of observations (states) encountered by the agent during the best performing episode.

print(obs_ep_list[reward_ep_list.index(m_val)])