In [None]:
import re
import json
import sys
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict

In [None]:
%matplotlib inline

In [None]:
def parse_metrics_from_log(filepath: Path) -> pd.DataFrame:
    """
    Parses a QUICHE log file, extracting all METRICS_LOG JSON blobs.
    
    Args:
        filepath: The Path object to the log file.

    Returns:
        A pandas DataFrame containing all metrics, or an empty DataFrame
        if no metrics are found.
    """
    # Regex to find the JSON blob in lines containing METRICS_LOG
    # It handles both "Client: METRICS_LOG {json}" and "Server: METRICS_LOG {json}"
    metrics_regex = re.compile(r'METRICS_LOG ({.*})')
    
    metrics_data = []
    
    try:
        with open(filepath, 'r') as f:
            for line in f:
                match = metrics_regex.search(line)
                if match:
                    try:
                        # Extract and parse the JSON blob
                        data = json.loads(match.group(1))
                        metrics_data.append(data)
                    except json.JSONDecodeError:
                        print(f"Warning: Skipping malformed JSON in {filepath.name}", file=sys.stderr)
                        
    except IOError as e:
        print(f"Error reading file {filepath}: {e}", file=sys.stderr)
        return pd.DataFrame()

    if not metrics_data:
        # This is very common for server logs that don't emit metrics, so no warning.
        pass
        
    return pd.DataFrame(metrics_data)


def plot_summary_boxplot(
    data_by_algo: dict,
    y_label: str,
    output_filename: str,
    label_map: dict,
    scale_factor: float = 1.0
):
    """
    Generates and saves a box plot using academic-style settings.

    Args:
        data_by_algo: Aggregated data (dict of {algo_key: [values]}).
        y_label: The label for the Y-axis.
        output_filename: The filename to save the plot as.
        label_map: Dictionary to map algorithm keys to display names.
        scale_factor: A factor to scale the metric by.
    """
    
    # Academic style settings from your reference
    academic_style_settings = {
        'font.family': 'serif',
        'font.serif': ['Times New Roman', 'DejaVu Serif'],
        'font.size': 18,
        'axes.titlesize': 18,
        'axes.labelsize': 26,
        'xtick.labelsize': 24,
        'ytick.labelsize': 24,
        'legend.fontsize': 18,
        'pdf.fonttype': 42,  # Use Type 1 fonts for PDF output
        'ps.fonttype': 42    # Use Type 1 fonts for PostScript output
    }

    # Use rc_context to apply settings temporarily
    try:
        with plt.rc_context(academic_style_settings):
            fig, ax = plt.subplots(figsize=(15, 7), dpi=300)
            
            plot_data = []
            plot_labels = []

            # Sort items for consistent plot order
            sorted_items = sorted(data_by_algo.items(), key=lambda item: label_map.get(item[0], item[0]))

            for algo, values in sorted_items:
                if values:
                    # Apply scaling to the aggregated values
                    scaled_values = np.array(values) * scale_factor
                    plot_data.append(scaled_values)
                    # Use the "pretty" name from the map
                    plot_labels.append(label_map.get(algo, algo))
                    print(f"Boxplot data for {algo} ({label_map.get(algo, algo)}): {len(scaled_values)} samples.")
                else:
                    print(f"Warning: No data found for {algo}, skipping.", file=sys.stderr)


            if not plot_data:
                print(f"Error: No data to plot for {output_filename}", file=sys.stderr)
                plt.close(fig)
                return

            # --- Plotting logic from your reference ---
            box_plot = ax.boxplot(plot_data, 
                                  patch_artist=True,  # Fill boxes with color
                                  labels=plot_labels,
                                  showfliers=False, 
                                  medianprops=dict(linewidth=2), 
                                  showmeans=False, 
                                  meanline=False)

            # Define grayscale/blue edge colors
            edge_shades = ["#1b4965", "#2c7da0", "#5fa8d3", "#90befe"] 

            for i, box in enumerate(box_plot['boxes']):
                shade = edge_shades[i % len(edge_shades)]
                box.set(facecolor="none", edgecolor=shade, linewidth=4)

            for i, (whiskerL, whiskerR) in enumerate(zip(box_plot['whiskers'][0::2],
                                                        box_plot['whiskers'][1::2])):
                shade = edge_shades[i % len(edge_shades)]
                whiskerL.set(color=shade, linewidth=4)
                whiskerR.set(color=shade, linewidth=4)

            for i, (capL, capR) in enumerate(zip(box_plot['caps'][0::2],
                                                box_plot['caps'][1::2])):
                shade = edge_shades[i % len(edge_shades)]
                capL.set(color=shade, linewidth=4)
                capR.set(color=shade, linewidth=4)

            for i, median in enumerate(box_plot['medians']):
                median.set(color=edge_shades[i % len(edge_shades)], linewidth=2.5)

            ax.set_xticks(np.arange(1, len(plot_labels) + 1))
            ax.set_xticklabels(plot_labels, ha='center')
            
            ax.set_ylabel(y_label)

            # Clean plot
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.grid(axis='y', linestyle='--', alpha=0.7) # Add grid

            plt.tight_layout()
            
            try:
                plt.savefig(output_filename, dpi=300, bbox_inches='tight')
                print(f"Successfully generated plot: {output_filename}")
            except IOError as e:
                print(f"Error saving plot {output_filename}: {e}", file=sys.stderr)
            
            plt.show()

    except Exception as e:
        print(f"Error applying academic plot style (may need Times New Roman font): {e}", file=sys.stderr)
        # Fallback to default style if it fails
        plt.close() # Close potentially broken plot

def plot_summary_barplot(
    data_by_algo: dict,
    y_label: str,
    output_filename: str,
    label_map: dict,
    scale_factor: float = 1.0
):
    """
    Generates and saves a bar plot of the median values,
    using academic-style settings.

    Args:
        data_by_algo: Aggregated data (dict of {algo: [values]}).
        y_label: The label for the Y-axis.
        output_filename: The filename to save the plot as.
        label_map: Dictionary to map algorithm keys to display names.
        scale_factor: A factor to scale the metric by.
    """
    
    # Academic style settings
    academic_style_settings = {
        'font.family': 'serif',
        'font.serif': ['Times New Roman', 'DejaVu Serif'],
        'font.size': 18,
        'axes.titlesize': 18,
        'axes.labelsize': 26,
        'xtick.labelsize': 24,
        'ytick.labelsize': 24,
        'legend.fontsize': 18,
        'pdf.fonttype': 42,  # Use Type 1 fonts for PDF output
        'ps.fonttype': 42    # Use Type 1 fonts for PostScript output
    }

    # Use rc_context to apply settings temporarily
    try:
        with plt.rc_context(academic_style_settings):
            fig, ax = plt.subplots(figsize=(15, 7), dpi=300)
            
            plot_data_medians = []
            plot_labels = []

            # Sort items for consistent plot order
            sorted_items = sorted(data_by_algo.items(), key=lambda item: label_map.get(item[0], item[0]))

            for algo, values in sorted_items:
                if values:
                    # Calculate the median and apply scaling
                    scaled_median = np.median(np.array(values)) * scale_factor
                    plot_data_medians.append(scaled_median)
                    # Use the "pretty" name from the map
                    plot_labels.append(label_map.get(algo, algo))
                    print(f"Bar data for {algo}: {len(values)} samples, median={scaled_median:.2f}")
                else:
                    print(f"Warning: No data for {algo}, skipping.", file=sys.stderr)


            if not plot_data_medians:
                print(f"Error: No data to plot for {output_filename}", file=sys.stderr)
                plt.close(fig)
                return

            # --- Plotting logic REVISED for bar chart ---
            
            # Create x-axis positions
            x_positions = np.arange(len(plot_labels))
            
            # Plot the bars
            bars = ax.bar(x_positions, 
                          plot_data_medians,  # Use medians as bar heights
                          align='center',
                          width=0.7)          # Bar width

            # Define grayscale/blue edge colors
            edge_shades = ["#1b4965", "#2c7da0", "#5fa8d3", "#90befe"] 

            # Apply styling to each bar
            for i, bar in enumerate(bars):
                shade = edge_shades[i % len(edge_shades)]
                bar.set_edgecolor(shade)
                bar.set_facecolor(shade)
                bar.set_linewidth(4)  # Match old boxplot line width

            # Set x-axis ticks and labels
            ax.set_xticks(x_positions)
            ax.set_xticklabels(plot_labels, ha='center')
            
            # --- End of revised plotting logic ---

            ax.set_ylabel(y_label)

            # Clean plot
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.grid(axis='y', linestyle='--', alpha=0.7) # Add grid
            
            # set ylims
            plt.ylim(0.5, 1.3)

            plt.tight_layout()
            
            try:
                plt.savefig(output_filename, dpi=300, bbox_inches='tight')
                print(f"Successfully generated plot: {output_filename}")
            except IOError as e:
                print(f"Error saving plot {output_filename}: {e}", file=sys.stderr)
            
            plt.show()

    except Exception as e:
        print(f"Error applying academic plot style (may need Times New Roman font): {e}", file=sys.stderr)
        # Fallback to default style if it fails
        plt.close() # Close potentially broken plot

In [None]:
# Use seaborn style for better-looking plots
try:
    plt.style.use('seaborn-v0_8-darkgrid')
except IOError:
    print("Seaborn style not available, using default.", file=sys.stderr)

log_dir = Path(".") 

# 1. Define Mappings
# Map the directory name to the internal algorithm key (used in filenames)
dir_to_key = {
    'CUBIC': 'QBIC',
    'BBRv1': 'BBRR',
    'BBRv3': 'B2ON',
    'LLM-BBR': 'LLMX'
    # Add other methods here if needed, e.g., 'Prague': 'PRGC'
}

# Map the internal algorithm key to the "pretty name" for plot labels
key_to_label = {
    'QBIC': 'CUBIC',
    'BBRR': 'BBRv1',
    'B2ON': 'BBRv3',
    'LLMX': 'LLM-BBR' # Using 'Ours' as in your original 'label_to_name'
    # 'PRGC': 'Prague'
}

# We'll calculate the mean of the metric after the first 30s
STABLE_START_TIME_MS = 0 # 30000 

# 2. Initialize Data Aggregators
goodput_data_by_algo = defaultdict(list)
srtt_data_by_algo = defaultdict(list)
queuing_delay_data_by_algo = defaultdict(list) # NEW

cwnd_data_by_algo = defaultdict(list)
retransmit_data_by_algo = defaultdict(list) # NEW
bw_est_data_by_algo = defaultdict(list) # NEW
pacing_rate_data_by_algo = defaultdict(list) # NEW


print("\n--- Aggregating Data from Sub-directories ---")

# 3. Iterate Over Directories and Aggregate Data
for dir_name, algo_key in dir_to_key.items():
    sub_dir = log_dir / dir_name
    
    if not sub_dir.is_dir():
        print(f"Warning: Directory not found, skipping: {sub_dir}", file=sys.stderr)
        continue

    # Find all relevant log files using the algorithm key
    client_files = sorted(sub_dir.glob(f"performance_log_{algo_key}_client_*.txt"))
    server_files = sorted(sub_dir.glob(f"performance_log_{algo_key}_server_*.txt"))
    
    print(f"Processing '{dir_name}' (Key: {algo_key}): Found {len(client_files)} client logs, {len(server_files)} server logs.")

    if not client_files and not server_files:
        print(f"Warning: No log files found in {dir_name} for key {algo_key}", file=sys.stderr)
        continue

    # 3.1. Process Client Logs (Goodput, SRTT, Queuing Delay)
    for c_file in client_files:
        df = parse_metrics_from_log(c_file)
        if df.empty:
            continue
        
        # Get stable data
        df_stable = df[df['time_ms'] > STABLE_START_TIME_MS]
        df_to_use = df_stable if not df_stable.empty else df

        # Extract Goodput
        if "goodput_bps" in df_to_use.columns:
            goodput_data_by_algo[algo_key].extend(df_to_use["goodput_bps"].to_list())
        
        # Extract SRTT
        if "srtt_ms" in df_to_use.columns:
            srtt_data_by_algo[algo_key].extend(df_to_use["srtt_ms"].to_list())

        # Extract Queuing Delay (from latest_rtt_us)
        if "latest_rtt_us" in df_to_use.columns:
            queuing_delay_data_by_algo[algo_key].extend(df_to_use["latest_rtt_us"].to_list())

    # 3.2. Process Server Logs (CWND, Retransmits, BW Est, Pacing Rate)
    for s_file in server_files:
        df = parse_metrics_from_log(s_file)
        if df.empty:
            continue
        
        # Get stable data (for gauges like CWND, BW Est, Pacing)
        df_stable = df[df['time_ms'] > STABLE_START_TIME_MS]
        df_to_use = df_stable if not df_stable.empty else df

        # Extract CWND (gauge)
        if "cwnd_bytes" in df_to_use.columns:
            cwnd_data_by_algo[algo_key].extend(df_to_use["cwnd_bytes"].to_list())
        
        # Extract BW Estimate (gauge)
        if "est_bw_bps" in df_to_use.columns:
            bw_est_data_by_algo[algo_key].extend(df_to_use["est_bw_bps"].to_list())
        
        # Extract Pacing Rate (gauge)
        if "pacing_rate_bps" in df_to_use.columns:
            pacing_rate_data_by_algo[algo_key].extend(df_to_use["pacing_rate_bps"].to_list())

        # Extract *final* retransmit value (it's a counter)
        # We use 'df' (full dataframe) to get the last recorded value.
        if "bytes_retrans" in df.columns and not df.empty:
            final_retrans_val = df["bytes_retrans"].iloc[-1]
            retransmit_data_by_algo[algo_key].append(final_retrans_val)

print("\n--- Data Aggregation Complete ---")

# Check if any data was aggregated at all
all_data_aggregrators = [
    goodput_data_by_algo, srtt_data_by_algo, queuing_delay_data_by_algo,
    cwnd_data_by_algo, retransmit_data_by_algo, bw_est_data_by_algo,
    pacing_rate_data_by_algo
]
if all(not d for d in all_data_aggregrators):
     print("Error: No data was successfully aggregated from any directory.", file=sys.stderr)
     print(f"Looked for directories: {list(dir_to_key.keys())}", file=sys.stderr)
     sys.exit(1)


# 4. Generate Plots
print("\n--- Generating Summary Box Plots ---")

# 4.1 Goodput Box Plot (Client)
print("Plotting Goodput...")
if goodput_data_by_algo:
    plot_summary_boxplot(
        data_by_algo=goodput_data_by_algo,
        y_label="Goodput (Mbps)",
        output_filename="client_goodput_boxplot.png",
        label_map=key_to_label,
        scale_factor=1 / 1_000_000 # Convert bps to Mbps
    )
else:
    print("Skipping Goodput plot (no data).")

# 4.2 SRTT Box Plot (Client)
print("Plotting SRTT...")
if srtt_data_by_algo:
    plot_summary_boxplot(
        data_by_algo=srtt_data_by_algo,
        y_label="SRTT (ms)",
        output_filename="client_srtt_boxplot.png",
        label_map=key_to_label,
        scale_factor=1.0 # Already in ms
    )
else:
    print("Skipping SRTT plot (no data).")

# 4.3 Round Trip Delay Box Plot (Client)
print("Plotting Round Trip Delay...")
if queuing_delay_data_by_algo:
    plot_summary_boxplot(
        data_by_algo=queuing_delay_data_by_algo,
        y_label="RTT (ms)",
        output_filename="client_queuing_delay_boxplot.png",
        label_map=key_to_label,
        scale_factor=1 # NOTE already in ms despite us log
    )
else:
    print("Skipping Queuing Delay plot (no data).")

# 4.4 CWND Box Plot (Server)
print("Plotting CWND...")
if cwnd_data_by_algo:
    plot_summary_boxplot(
        data_by_algo=cwnd_data_by_algo,
        y_label="CWND (KB)",
        output_filename="server_cwnd_boxplot.png",
        label_map=key_to_label,
        scale_factor=1 / 1024 # Convert bytes to KB
    )
else:
    print("Skipping CWND plot (no data).")

# 4.5 Retransmits Box Plot (Server)
print("Plotting Retransmits...")
if retransmit_data_by_algo:
    plot_summary_boxplot(
        data_by_algo=retransmit_data_by_algo,
        y_label="Total Retransmitted (KB)",
        output_filename="server_retransmits_boxplot.png",
        label_map=key_to_label,
        scale_factor=1 / 1024 # Convert bytes to KB
    )
else:
    print("Skipping Retransmits plot (no data).")

# 4.6 Bandwidth Estimate Box Plot (Server)
print("Plotting Bandwidth Estimate...")
if bw_est_data_by_algo:
    plot_summary_boxplot(
        data_by_algo=bw_est_data_by_algo,
        y_label="Est. Bandwidth (Mbps)",
        output_filename="server_bw_est_boxplot.png",
        label_map=key_to_label,
        scale_factor=1 / 1_000_000 # Convert bps to Mbps
    )
else:
    print("Skipping Bandwidth Estimate plot (no data).")

# 4.7 Pacing Rate Box Plot (Server)
print("Plotting Pacing Rate...")
if pacing_rate_data_by_algo:
    plot_summary_boxplot(
        data_by_algo=pacing_rate_data_by_algo,
        y_label="Pacing Rate (Mbps)",
        output_filename="server_pacing_rate_boxplot.png",
        label_map=key_to_label,
        scale_factor=1 / 1_000_000 # Convert bps to Mbps
    )
else:
    print("Skipping Pacing Rate plot (no data).")


print("\n--- Plotting complete. ---")