In [2]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from pathlib import Path
from typing import Optional, Dict, List, Tuple, Union

In [3]:

def read_training_stats_h5(filepath: Union[str, Path]) -> Dict:
    """
    Read training statistics from an H5 file, assuming a structure similar to TrainingStats.

    Parameters:
    -----------
    filepath : str or Path
        Path to the h5 file containing training statistics

    Returns:
    --------
    Dict
        Dictionary containing all training statistics data
    """
    filepath = Path(filepath)
    stats_data = {}

    with h5py.File(filepath, 'r') as f:
        # Read main attributes if available
        if 'metadata' in f:
            stats_data['metadata'] = {
                key: f['metadata'].attrs[key] for key in f['metadata'].attrs}

        # Read rewards
        if 'rewards' in f:
            stats_data['rewards'] = {}
            for engine in f['rewards']:
                stats_data['rewards'][engine] = f['rewards'][engine][:]

        # Read episode durations
        if 'episode_durations' in f:
            stats_data['episode_durations'] = {}
            for engine in f['episode_durations']:
                stats_data['episode_durations'][engine] = f['episode_durations'][engine][:]

        # Read step times
        if 'step_times' in f:
            stats_data['step_times'] = {}
            for engine in f['step_times']:
                stats_data['step_times'][engine] = f['step_times'][engine][:]

        # Read episode steps
        if 'episode_steps' in f:
            stats_data['episode_steps'] = {}
            for engine in f['episode_steps']:
                stats_data['episode_steps'][engine] = f['episode_steps'][engine][:]

        # Read instance rewards
        if 'instance_rewards' in f:
            stats_data['instance_rewards'] = {}
            for instance in f['instance_rewards']:
                stats_data['instance_rewards'][instance] = f['instance_rewards'][instance][:]

        # Read TD errors if available
        if 'td_errors' in f:
            stats_data['td_errors'] = f['td_errors'][:]

    return stats_data


def plot_training_stats(
    stats_data: Dict,
    figsize: Tuple[int, int] = (15, 10),
    smoothing_window: int = 10,
    save_path: Optional[str] = None
) -> plt.Figure:
    """
    Plot training statistics from the dictionary returned by read_training_stats_h5.

    Parameters:
    -----------
    stats_data : Dict
        Dictionary containing training statistics data
    figsize : Tuple[int, int]
        Figure size (width, height) in inches
    smoothing_window : int
        Window size for moving average smoothing of metrics
    save_path : Optional[str]
        Path to save the plot. If None, plot won't be saved.

    Returns:
    --------
    plt.Figure
        Matplotlib figure containing the plots
    """
    # Create figure with subplots
    fig = plt.figure(figsize=figsize)

    # Define subplot grid - adjust based on available data
    grid = plt.GridSpec(3, 2, figure=fig)

    # Apply smoothing function
    def smooth(y, window):
        if len(y) < window:
            return y
        box = np.ones(window) / window
        y_smooth = np.convolve(y, box, mode='valid')
        # Pad the beginning to maintain same length
        padding = np.full(window-1, y_smooth[0])
        return np.concatenate([padding, y_smooth])

    # Plot rewards by engine type
    if 'rewards' in stats_data:
        ax_rewards = fig.add_subplot(grid[0, 0])
        for engine, rewards in stats_data['rewards'].items():
            episodes = np.arange(len(rewards))
            ax_rewards.plot(episodes, rewards, alpha=0.3,
                            label=f"{engine} (raw)")

            # Add smoothed version
            if len(rewards) >= smoothing_window:
                smoothed = smooth(rewards, smoothing_window)
                ax_rewards.plot(episodes, smoothed, linewidth=2,
                                label=f"{engine} ({smoothing_window}-ep avg)")

        ax_rewards.set_title('Rewards by Engine Type')
        ax_rewards.set_xlabel('Episode')
        ax_rewards.set_ylabel('Reward')
        ax_rewards.legend()
        ax_rewards.grid(True, linestyle='--', alpha=0.7)

    # Plot episode durations
    if 'episode_durations' in stats_data:
        ax_durations = fig.add_subplot(grid[0, 1])
        for engine, durations in stats_data['episode_durations'].items():
            episodes = np.arange(len(durations))
            ax_durations.plot(episodes, durations, alpha=0.3,
                              label=f"{engine} (raw)")

            # Add smoothed version
            if len(durations) >= smoothing_window:
                smoothed = smooth(durations, smoothing_window)
                ax_durations.plot(episodes, smoothed, linewidth=2,
                                  label=f"{engine} ({smoothing_window}-ep avg)")

        ax_durations.set_title('Episode Durations by Engine Type')
        ax_durations.set_xlabel('Episode')
        ax_durations.set_ylabel('Duration (seconds)')
        ax_durations.legend()
        ax_durations.grid(True, linestyle='--', alpha=0.7)

    # Plot episode steps
    if 'episode_steps' in stats_data:
        ax_steps = fig.add_subplot(grid[1, 0])
        for engine, steps in stats_data['episode_steps'].items():
            episodes = np.arange(len(steps))
            ax_steps.plot(episodes, steps, alpha=0.3, label=f"{engine} (raw)")

            # Add smoothed version
            if len(steps) >= smoothing_window:
                smoothed = smooth(steps, smoothing_window)
                ax_steps.plot(episodes, smoothed, linewidth=2,
                              label=f"{engine} ({smoothing_window}-ep avg)")

        ax_steps.set_title('Episode Steps by Engine Type')
        ax_steps.set_xlabel('Episode')
        ax_steps.set_ylabel('Steps')
        ax_steps.legend()
        ax_steps.grid(True, linestyle='--', alpha=0.7)

    # Plot TD errors if available
    if 'td_errors' in stats_data:
        ax_td = fig.add_subplot(grid[1, 1])
        td_errors = stats_data['td_errors']

        # Check if td_errors is a 1D or 2D array
        if len(td_errors.shape) == 1:
            # Plot as a single line
            episodes = np.arange(len(td_errors))
            ax_td.plot(episodes, td_errors, alpha=0.3, label='TD Error (raw)')

            # Add smoothed version
            if len(td_errors) >= smoothing_window:
                smoothed = smooth(td_errors, smoothing_window)
                ax_td.plot(episodes, smoothed, linewidth=2,
                           label=f'TD Error ({smoothing_window}-ep avg)')
        else:
            # Plot as a heatmap or scatter
            ax_td.scatter(np.repeat(np.arange(td_errors.shape[0]), td_errors.shape[1]),
                          td_errors.flatten(), alpha=0.1, s=3)

            # Add trend line if possible
            mean_errors = np.mean(td_errors, axis=1)
            ax_td.plot(np.arange(len(mean_errors)), mean_errors, 'r-',
                       linewidth=2, label='Mean TD Error')

        ax_td.set_title('TD Errors')
        ax_td.set_xlabel('Episode')
        ax_td.set_ylabel('TD Error')
        ax_td.legend()
        ax_td.grid(True, linestyle='--', alpha=0.7)

    # Plot step times as distribution
    if 'step_times' in stats_data:
        ax_step_times = fig.add_subplot(grid[2, 0])

        for engine, times in stats_data['step_times'].items():
            # Only include finite, non-zero values
            valid_times = [t for t in times if np.isfinite(t) and t > 0]
            if valid_times:
                # Convert to milliseconds for better readability
                times_ms = np.array(valid_times) * 1000
                ax_step_times.hist(times_ms, bins=30, alpha=0.6, label=engine)

        ax_step_times.set_title('Step Time Distribution')
        ax_step_times.set_xlabel('Step Time (ms)')
        ax_step_times.set_ylabel('Frequency')
        ax_step_times.legend()
        ax_step_times.grid(True, linestyle='--', alpha=0.7)

    # Plot engine comparison (rewards vs. durations)
    if 'rewards' in stats_data and 'episode_durations' in stats_data:
        ax_compare = fig.add_subplot(grid[2, 1])

        for engine in stats_data['rewards']:
            if engine in stats_data['episode_durations']:
                rewards = stats_data['rewards'][engine]
                durations = stats_data['episode_durations'][engine]

                # Ensure same length
                min_len = min(len(rewards), len(durations))
                rewards = rewards[:min_len]
                durations = durations[:min_len]

                ax_compare.scatter(durations, rewards, alpha=0.6, label=engine)

        ax_compare.set_title('Reward vs. Episode Duration')
        ax_compare.set_xlabel('Episode Duration (seconds)')
        ax_compare.set_ylabel('Reward')
        ax_compare.legend()
        ax_compare.grid(True, linestyle='--', alpha=0.7)

    plt.tight_layout()

    # Save figure if a path is provided
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    return fig


def plot_reward_comparison(
    stats_data: Dict,
    figsize: Tuple[int, int] = (12, 6),
    smoothing_window: int = 10,
    save_path: Optional[str] = None
) -> plt.Figure:
    """
    Plot a detailed comparison of rewards between different engine types.

    Parameters:
    -----------
    stats_data : Dict
        Dictionary containing training statistics data
    figsize : Tuple[int, int]
        Figure size (width, height) in inches
    smoothing_window : int
        Window size for moving average smoothing
    save_path : Optional[str]
        Path to save the plot. If None, plot won't be saved.

    Returns:
    --------
    plt.Figure
        Matplotlib figure containing the plots
    """
    if 'rewards' not in stats_data or not stats_data['rewards']:
        print("No reward data available for comparison")
        return None

    # Create figure with subplots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)

    # Apply smoothing function
    def smooth(y, window):
        if len(y) < window:
            return y
        box = np.ones(window) / window
        y_smooth = np.convolve(y, box, mode='valid')
        # Pad the beginning to maintain same length
        padding = np.full(window-1, y_smooth[0])
        return np.concatenate([padding, y_smooth])

    # Left plot: Smoothed rewards over time
    engines = list(stats_data['rewards'].keys())
    engine_colors = plt.cm.tab10(np.linspace(0, 1, len(engines)))

    for i, engine in enumerate(engines):
        rewards = stats_data['rewards'][engine]
        episodes = np.arange(len(rewards))

        # Plot raw data with low alpha
        ax1.plot(episodes, rewards, alpha=0.2, color=engine_colors[i])

        # Plot smoothed version
        if len(rewards) >= smoothing_window:
            smoothed = smooth(rewards, smoothing_window)
            ax1.plot(episodes, smoothed, linewidth=2,
                     color=engine_colors[i], label=engine)
        else:
            ax1.plot(episodes, rewards, linewidth=2,
                     color=engine_colors[i], label=engine)

    ax1.set_title('Reward Progress by Engine Type')
    ax1.set_xlabel('Episode')
    ax1.set_ylabel('Reward')
    ax1.legend()
    ax1.grid(True, linestyle='--', alpha=0.7)

    # Right plot: Box plot comparison
    reward_data = [stats_data['rewards'][engine] for engine in engines]
    ax2.boxplot(reward_data, labels=engines, patch_artist=True,
                boxprops=dict(facecolor='lightblue', color='blue'),
                flierprops=dict(marker='o', markerfacecolor='red', markersize=3,
                                markeredgecolor='black'))

    # Add strip plot for individual points
    for i, data in enumerate(reward_data):
        # Add jitter to x-position
        x = np.random.normal(i+1, 0.08, size=len(data))
        ax2.scatter(x, data, alpha=0.3, s=10, color=engine_colors[i])

    ax2.set_title('Reward Distribution by Engine Type')
    ax2.set_xlabel('Engine Type')
    ax2.set_ylabel('Reward')
    ax2.grid(True, linestyle='--', alpha=0.7)

    plt.tight_layout()

    # Save figure if a path is provided
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    return fig

In [None]:
h5_file = "logs/unknown_env__20250325_204445.h5"
fig, stats_data = read_and_visualize_training_stats_h5(h5_file)
plt.show()

IndentationError: unexpected indent (764120054.py, line 2)