In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import os
import numpy as np
from pathlib import Path

# Plotting aesthetics from your configuration
plt.style.use('default')
plt.rcParams['figure.figsize'] = (5, 2.4) # Slightly increased for better label visibility
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.linestyle'] = '--'
plt.rcParams['grid.alpha'] = 0.6
plt.rcParams['font.size'] = 11.5
plt.rcParams['axes.labelsize'] = 11.5
plt.rcParams['legend.fontsize'] = 10
plt.rcParams['figure.dpi'] = 150 # Increased DPI for sharper images
plt.rcParams['figure.autolayout'] = True # Helps prevent labels from being cut off

In [2]:
# --- Base Path Configuration ---
user = os.environ.get("USER", "default_user")
BASE_RESULTS_PATH = Path(f"/home/users/ntu/{user}/slimsc/prune/results")

# --- ❗ IMPORTANT: DEFINE YOUR COMPARISON SETS HERE ❗ ---
# Add a dictionary for each pair of CSVs you want to compare.
# - 'name': A unique identifier for the plot filenames.
# - 'sc_path': Path to the Standard Cache (control) CSV.
# - 'sp_path': Path to the Similarity Pruning CSV.

COMPARISON_PAIRS = [
    {
        "name": "qwq_gpqa_diamond_q70",
        "sc_path": BASE_RESULTS_PATH / "QwQ-32B/gpqa_diamond/sc_16_control/run3/kvcache_usages/question_70_kvcache_usage.csv",
        "sp_path": BASE_RESULTS_PATH / "QwQ-32B/gpqa_diamond/random_n16_thresh0.98_delay20/run3/kvcache_usages/question_70_kvcache_usage.csv"
    },
    {
        "name": "qwq_aime_q8",
        "sc_path": BASE_RESULTS_PATH / "QwQ-32B/aime/sc_8_control/run2/kvcache_usages/question_8_kvcache_usage.csv",
        "sp_path": BASE_RESULTS_PATH / "QwQ-32B/aime/random_n8_thresh0.98_delay20/run3/kvcache_usages/question_8_kvcache_usage.csv"
    },
    {
        "name": "qwq_aqua_rat_q242",
        "sc_path": BASE_RESULTS_PATH / "QwQ-32B/aqua_rat/sc_8_control/run2/kvcache_usages/question_242_kvcache_usage.csv",
        "sp_path": BASE_RESULTS_PATH / "QwQ-32B/aqua_rat/random_n8_thresh0.98_delay20/run2/kvcache_usages/question_242_kvcache_usage.csv"
    },
    {
        "name": "r1_gpqa_diamond_q2",
        "sc_path": BASE_RESULTS_PATH / "R1-Distill-Qwen-14B/gpqa_diamond/sc_64_control/run2/kvcache_usages/question_2_kvcache_usage.csv",
        "sp_path": BASE_RESULTS_PATH / "R1-Distill-Qwen-14B/gpqa_diamond/random_n64_thresh0.98_delay20/run2/kvcache_usages/question_2_kvcache_usage.csv"
    },
    {
        "name": "r1_aime_q22",
        "sc_path": BASE_RESULTS_PATH / "R1-Distill-Qwen-14B/aime/sc_64_control/run3/kvcache_usages/question_22_kvcache_usage.csv",
        "sp_path": BASE_RESULTS_PATH / "R1-Distill-Qwen-14B/aime/random_n64_thresh0.98_delay20/run3/kvcache_usages/question_22_kvcache_usage.csv"
    },
    {
        "name": "r1_aqua_rat_q70",
        "sc_path": BASE_RESULTS_PATH / "R1-Distill-Qwen-14B/aqua_rat/sc_8_control/run2/kvcache_usages/question_70_kvcache_usage.csv",
        "sp_path": BASE_RESULTS_PATH / "R1-Distill-Qwen-14B/aqua_rat/random_n8_thresh0.98_delay20/run2/kvcache_usages/question_70_kvcache_usage.csv"
    },
    # --- Add more comparisons here ---
    # {
    #     "name": "My_Next_Model_Experiment",
    #     "sc_path": BASE_RESULTS_PATH / "path/to/next/sc.csv",
    #     "sp_path": BASE_RESULTS_PATH / "path/to/next/sp.csv"
    # },
]

# Directory to save plots
PLOTS_DIR = Path("plots")
PLOTS_DIR.mkdir(exist_ok=True)

In [3]:
def load_and_process_data(csv_path: Path) -> pd.DataFrame:
    """
    Loads KV cache data from a CSV, normalizes time, and adds token count.

    Args:
        csv_path: Path to the input CSV file.

    Returns:
        A pandas DataFrame with processed columns.
    """
    if not csv_path.is_file():
        raise FileNotFoundError(f"Data file not found: {csv_path}")

    df = pd.read_csv(csv_path)
    df['time_s'] = df['timestamp'] - df['timestamp'].iloc[0]
    df['token_count'] = np.arange(1, len(df) + 1)
    df['kv_cache_usage_pct'] = df['gpu_cache_usage_perc'] * 100
    
    print(f"  - Successfully loaded {len(df)} tokens from {csv_path.name}")
    return df

In [4]:
def create_comparison_plot(df_sc: pd.DataFrame, df_sp: pd.DataFrame, x_col: str, x_label: str, save_path: Path):
    """
    Generates and saves a single comparison plot.

    Args:
        df_sc: DataFrame for Standard Cache.
        df_sp: DataFrame for Similarity Pruning.
        x_col: The column name to use for the x-axis ('time_s' or 'token_count').
        x_label: The display label for the x-axis.
        save_path: The full path to save the plot image.
    """
    fig, ax = plt.subplots()

    ax.plot(df_sc[x_col], df_sc['kv_cache_usage_pct'], label='SC', color='royalblue', linewidth=2)
    ax.plot(df_sp[x_col], df_sp['kv_cache_usage_pct'], label='Slim-SC (RP)', color='darkorange', linewidth=2)

    ax.set_xlabel(x_label)
    ax.set_ylabel('KV Cache Usage (%)')
    
    ax.set_ylim(bottom=0)
    ax.set_xlim(left=0)
    ax.legend()

    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close(fig) # Close the figure to free up memory
    print(f"  - Plot saved to: {save_path}")

In [5]:
for comparison in COMPARISON_PAIRS:
    name = comparison['name']
    sc_path = comparison['sc_path']
    sp_path = comparison['sp_path']

    print(f"\n--- Processing comparison: {name} ---")

    try:
        # Load data for the current pair
        df_sc = load_and_process_data(sc_path)
        df_sp = load_and_process_data(sp_path)

        # 1. Generate plot against TIME
        create_comparison_plot(
            df_sc=df_sc,
            df_sp=df_sp,
            x_col='time_s',
            x_label='Time (s)',
            save_path=PLOTS_DIR / f'{name}_kvc_vs_time.png'
        )

        # 2. Generate plot against TOKENS
        create_comparison_plot(
            df_sc=df_sc,
            df_sp=df_sp,
            x_col='token_count',
            x_label='Token Count',
            save_path=PLOTS_DIR / f'{name}_kvc_vs_token.png'
        )

    except FileNotFoundError as e:
        print(f"  ❌ ERROR: Could not process '{name}'. File not found.")
        print(f"     Details: {e}")
    except Exception as e:
        print(f"  ❌ ERROR: An unexpected error occurred while processing '{name}': {e}")

print("\n✅ All comparisons processed.")


--- Processing comparison: qwq_gpqa_diamond_q70 ---
  - Successfully loaded 11769 tokens from question_70_kvcache_usage.csv
  - Successfully loaded 10819 tokens from question_70_kvcache_usage.csv
  - Plot saved to: plots/qwq_gpqa_diamond_q70_kvc_vs_time.png
  - Plot saved to: plots/qwq_gpqa_diamond_q70_kvc_vs_token.png

--- Processing comparison: qwq_aime_q8 ---
  - Successfully loaded 26665 tokens from question_8_kvcache_usage.csv
  - Successfully loaded 19037 tokens from question_8_kvcache_usage.csv
  - Plot saved to: plots/qwq_aime_q8_kvc_vs_time.png
  - Plot saved to: plots/qwq_aime_q8_kvc_vs_token.png

--- Processing comparison: qwq_aqua_rat_q242 ---
  - Successfully loaded 15841 tokens from question_242_kvcache_usage.csv
  - Successfully loaded 11972 tokens from question_242_kvcache_usage.csv
  - Plot saved to: plots/qwq_aqua_rat_q242_kvc_vs_time.png
  - Plot saved to: plots/qwq_aqua_rat_q242_kvc_vs_token.png

--- Processing comparison: r1_gpqa_diamond_q2 ---
  - Successfully lo