In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import seaborn

In [None]:
def find_latest_file_in_subfolders(base_folder):
    output = []
    for folder_name in sorted(os.listdir(base_folder)):  # Sort subfolder names if needed
        folder_path = os.path.join(base_folder, folder_name)
        
        if os.path.isdir(folder_path) and folder_name.startswith('sub'):
            files = sorted(os.listdir(folder_path))  # Sort files by name
            
            if files:  # Ensure folder is not empty
                last_file = files[-1]  # Get last file after sorting
                file = base_folder + '/' + folder_name + '/' + last_file
                output.append(file)
    return output

In [None]:
# Example usage
base_directory = "../results/models"
all_files = find_latest_file_in_subfolders(base_directory)

In [None]:
def load_loss_and_rewards(path):
    f = torch.load(path)
    loss = np.array(f['loss'])
    reward = f['rewards']
    reward = np.array([r.item() for r in reward])
    # reward = np.array(reward)
    return loss, reward

In [None]:
all_loss = np.zeros((len(all_files), 300))
all_reward = np.zeros((len(all_files), 300))
for file in all_files:
    loss, reward = load_loss_and_rewards(file)
    all_loss[all_files.index(file)] = loss[:300]
    all_reward[all_files.index(file)] = reward[:300]

In [None]:
# Set a professional style
# plt.style.use('seaborn-darkgrid')

# Calculate mean and standard deviation for loss
mean_loss = np.mean(all_loss, axis=0)
std_loss = np.std(all_loss, axis=0)

# Calculate the confidence interval (95% confidence) for loss
confidence_interval_loss = 1.96 * std_loss / np.sqrt(all_loss.shape[0])

# Calculate mean and standard deviation for reward
mean_reward = np.mean(all_reward, axis=0)
std_reward = np.std(all_reward, axis=0)

# Calculate the confidence interval (95% confidence) for reward
confidence_interval_reward = 1.96 * std_reward / np.sqrt(all_reward.shape[0])

# Plot the mean with confidence interval for loss and reward
fig, axs = plt.subplots(2, 1, figsize=(7, 4))

# Plot for loss
axs[0].plot(mean_loss, label='Mean Loss', color='blue')
axs[0].fill_between(range(len(mean_loss)), mean_loss - confidence_interval_loss, mean_loss + confidence_interval_loss, color='blue', alpha=0.2, label='95% Confidence Interval')
axs[0].set_xlabel('Epoch', fontsize=12)
axs[0].set_ylabel('Loss', fontsize=12)
axs[0].set_title('Mean Loss with 95% Confidence Interval', fontsize=14)
axs[0].legend(fontsize=10)
# axs[0].grid(True)
axs[0].spines['right'].set_visible(False)
axs[0].spines['top'].set_visible(False)

# Set xticks for loss plot
xticks = [0, len(mean_loss) // 2, len(mean_loss) - 1]
xtick_labels = [str(xtick + 1) for xtick in xticks]
axs[0].set_xticks(xticks)
axs[0].set_xticklabels(xtick_labels)

# Plot for reward
axs[1].plot(mean_reward, label='Mean Reward', color='green')
axs[1].fill_between(range(len(mean_reward)), mean_reward - confidence_interval_reward, mean_reward + confidence_interval_reward, color='green', alpha=0.2, label='95% Confidence Interval')
axs[1].set_xlabel('Epoch', fontsize=12)
axs[1].set_ylabel('Reward', fontsize=12)
axs[1].set_title('Mean Reward with 95% Confidence Interval', fontsize=14)
axs[1].legend(fontsize=10)
# axs[1].grid(True)
axs[1].spines['right'].set_visible(False)
axs[1].spines['top'].set_visible(False)

# Set xticks for reward plot
axs[1].set_xticks(xticks)
axs[1].set_xticklabels(xtick_labels)

# Adjust layout and show plot
plt.tight_layout()
plt.savefig('../results/Imgs/group_level/mean_loss_reward_RL.pdf')
plt.show()