In [11]:
# KV‑cache compression analysis (open‑source only)
# ------------------------------------------------
import json, glob, re, statistics, itertools, os
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
from typing import Optional, Dict, Any, List

# Ensure the plots directory exists
plots_dir = "./plots"
os.makedirs(plots_dir, exist_ok=True)

# 1) locate result files in the current directory
files = glob.glob("benchmark_results_distilgpt2_*.json")
if not files:
    raise FileNotFoundError("No benchmark_results_distilgpt2_*.json files in this folder!")

records: List[Dict[str, Any]] = []

# Function to extract layer ratio and latent dimension from filename
def get_ae_config(filename: str) -> Optional[Dict[str, Any]]:
    match_ae = re.search(r"ae_(\d+)_(\d+)\.json$", filename)
    if match_ae:
        encoder_layers = int(match_ae.group(1))
        decoder_layers = int(match_ae.group(2))
        return {"arch": f"Asym AE ({encoder_layers}:{decoder_layers})", "latent_dim": None}
    match_ld = re.search(r"distilgpt2_(\d+)\.json$", filename)
    if match_ld:
        latent_dim = int(match_ld.group(1))
        return {"arch": "Sym AE", "latent_dim": latent_dim}
    return None

for path in files:
    ae_config = get_ae_config(path)
    if not ae_config:
        continue  # Skip files that don't match the expected pattern

    with open(path) as fp:
        res = json.load(fp)

    raw_ppl: float = res["raw_baseline_ppl"]

    for bits_str, vals in res["perplexities"].items():
        bits: Optional[int] = int(bits_str) if bits_str != "none" else None
        kv_ppl: Optional[float] = vals.get("kv_cache_baseline_ppl")
        ae_ppl: Optional[float] = vals.get("ae_compressed_ppl")

        latent_dim: Optional[int] = res.get("config", {}).get("latent_dim", ae_config.get("latent_dim"))
        comp_ratio: Optional[float] = None
        if latent_dim is not None and bits is not None:
            comp_ratio = (768 / latent_dim) * (32 / bits) # Assuming original is roughly 32 bits per dim

        records.append(dict(
            method      = ae_config["arch"],
            latent_dim  = latent_dim,
            bits        = bits,
            compression = comp_ratio,
            raw_ppl     = raw_ppl,
            kv_ppl      = kv_ppl,
            ae_ppl      = ae_ppl
        ))

df = pd.DataFrame(records).sort_values(["method", "latent_dim", "bits"]).reset_index(drop=True)

# --- Experiment 1: Perplexity Comparison ---

plt.figure(figsize=(10, 6))
markers = ['o-', 's--', '^-', 'v-.', 'p:', 'h-', 'x--', 'D-.']
methods = sorted(df['method'].unique())

for i, method in enumerate(methods):
    subset = df[df['method'] == method].dropna(subset=['compression', 'ae_ppl'])
    if method.startswith("Asym AE"):
        plt.plot(subset['compression'], subset['ae_ppl'], markers[i], label=method)
    elif method == "Sym AE":
        plt.plot(subset['compression'], subset['ae_ppl'], markers[i], label=method)

# Plot KV-Quant baseline
kv_data = df.dropna(subset=['compression', 'kv_ppl'])
if not kv_data.empty:
    # Aggregate KV-Quant results across different AE architectures/latent dims
    grouped_kv = kv_data.groupby('compression')['kv_ppl'].mean().reset_index()
    plt.plot(grouped_kv['compression'], grouped_kv['kv_ppl'], 'k-', label='KV-Quant (Baseline)')

plt.xlabel("Compression Ratio")
plt.ylabel("Perplexity")
plt.title("Experiment 1: Perplexity vs. Compression Ratio")
plt.grid(True, which="both")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "exp1_perplexity_vs_compression_ratio.png"))
plt.close()

print(f"\nPlot for Experiment 1 saved to '{plots_dir}'.")


Plot for Experiment 1 saved to './plots'.
