In [21]:
# KV‑cache compression analysis (open‑source only)
# ------------------------------------------------
import json, glob, re, statistics, itertools, os
import pandas as pd
import matplotlib.pyplot as plt
from typing import Optional, Dict, Any, List

# Ensure the plots directory exists
plots_dir = "./plots"
os.makedirs(plots_dir, exist_ok=True)

# 1) locate result files in the current directory
files = glob.glob("benchmark_results_distilgpt2_*.json")
if not files:
    raise FileNotFoundError("No benchmark_results_distilgpt2_*.json files in this folder!")

records: List[Dict[str, Any]] = []

# Function to extract latent dimension from filename
def get_latent_dim(filename: str) -> Optional[int]:
    match_ld = re.search(r"distilgpt2_(\d+)\.json$", filename)
    if match_ld:
        return int(match_ld.group(1))
    return None

for path in files:
    latent_dim = get_latent_dim(path)
    if latent_dim is not None:
        with open(path) as fp:
            res = json.load(fp)

        raw_ppl_wiki = res.get("raw_baseline_ppl")
        ae_ppl_wiki_none = res["perplexities"].get("none", {}).get("ae_compressed_ppl")

        if raw_ppl_wiki is not None and ae_ppl_wiki_none is not None:
            records.append(dict(
                latent_dim=latent_dim,
                raw_ppl=raw_ppl_wiki,
                ae_ppl=ae_ppl_wiki_none,
                dataset="WikiText"
            ))

        lb_baseline = res.get("longbench", {}).get("baseline", {})
        lb_compressed_none = res.get("longbench", {}).get("compressed", {}).get("none", {})

        for task, base_ppl in lb_baseline.items():
            comp_ppl = lb_compressed_none.get(task)
            if comp_ppl is not None:
                records.append(dict(
                    latent_dim=latent_dim,
                    raw_ppl=base_ppl,
                    ae_ppl=comp_ppl,
                    dataset=f"LongBench ({task})"
                ))

df = pd.DataFrame(records).sort_values(["latent_dim", "dataset"]).reset_index(drop=True)

latent_dims = sorted(df['latent_dim'].dropna().unique())
longbench_tasks = sorted([d.split('(')[1][:-1] for d in df['dataset'].unique() if d.startswith("LongBench")])

# Plot for WikiText Perplexity (Log Scale)
plt.figure(figsize=(8, 5))
wiki_data = df[df['dataset'] == 'WikiText'].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl']).sort_values('latent_dim')
if not wiki_data.empty:
    raw_ppl_baseline_wiki = wiki_data['raw_ppl'].iloc[0]
    plt.plot(wiki_data['latent_dim'], [raw_ppl_baseline_wiki] * len(wiki_data), linestyle='--', marker='o', label='Raw Baseline')
    plt.plot(wiki_data['latent_dim'], wiki_data['ae_ppl'], marker='x', label='AE')
    plt.xlabel("Latent Dimension")
    plt.ylabel("Perplexity (Log Scale)")
    plt.title("WikiText Perplexity vs. Latent Dimension")
    plt.yscale('log')
    plt.legend()
    plt.grid(True, which="both")
    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, "exp1_wikitext_perplexity_vs_latent_dim_logy.png"))
    plt.close()

# Plot for LongBench Perplexity (averaged across tasks, Log Scale)
if longbench_tasks:
    plt.figure(figsize=(10, 6))
    longbench_data = df[df['dataset'].str.startswith('LongBench')].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'])
    if not longbench_data.empty:
        avg_raw_ppl_lb = longbench_data.groupby('latent_dim')['raw_ppl'].mean()
        avg_ae_ppl_lb = longbench_data.groupby('latent_dim')['ae_ppl'].mean()

        plt.plot(avg_raw_ppl_lb.index, avg_raw_ppl_lb.values, linestyle='--', marker='o', label='Avg. Raw Baseline')
        plt.plot(avg_ae_ppl_lb.index, avg_ae_ppl_lb.values, marker='x', label='Avg. AE')
        plt.xlabel("Latent Dimension")
        plt.ylabel("Average Perplexity (LongBench, Log Scale)")
        plt.yscale('log')
        plt.title("Average LongBench Perplexity vs. Latent Dimension")
        plt.legend()
        plt.grid(True, which="both")
        plt.tight_layout()
        plt.savefig(os.path.join(plots_dir, "exp1_longbench_avg_perplexity_vs_latent_dim_logy.png"))
        plt.close()

# Create Table Data
table_data = []
wiki_data_table = df[df['dataset'] == 'WikiText'].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl']).sort_values('latent_dim')
if not wiki_data_table.empty:
    raw_ppl_baseline_wiki_table = wiki_data_table['raw_ppl'].iloc[0]
    for index, row in wiki_data_table.iterrows():
        table_data.append({
            "Latent Dim": int(row['latent_dim']),
            "Dataset": "WikiText",
            "Raw PPL": f"{raw_ppl_baseline_wiki_table:.2f}",
            "AE PPL": f"{row['ae_ppl']:.2f}"
        })

longbench_data_table = df[df['dataset'].str.startswith('LongBench')].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'])
if not longbench_data_table.empty:
    avg_results_lb = longbench_data_table.groupby('latent_dim').agg(
        Avg_Raw_PPL=('raw_ppl', 'mean'),
        Avg_AE_PPL=('ae_ppl', 'mean')
    ).reset_index()
    for index, row in avg_results_lb.iterrows():
        table_data.append({
            "Latent Dim": int(row['latent_dim']),
            "Dataset": "LongBench (Avg)",
            "Raw PPL": f"{row['Avg_Raw_PPL']:.2f}",
            "AE PPL": f"{row['Avg_AE_PPL']:.2f}"
        })

table_df = pd.DataFrame(table_data)
display(table_df)

# Create LaTeX Table
latex_table = table_df.to_latex(index=False, float_format="%.2f", caption="Perplexity Comparison vs. Latent Dimension (No Compression)", label="tab:perplexity_vs_latent_dim")
print("\nLaTeX Table:")
print(latex_table)

print(f"\nPerplexity vs. Latent Dimension plots (log y-axis) and table (with LaTeX output) saved/printed.")

Unnamed: 0,Latent Dim,Dataset,Raw PPL,AE PPL
0,8,WikiText,76.19,5049.08
1,16,WikiText,76.19,2192.71
2,32,WikiText,76.19,167.0



LaTeX Table:
\begin{table}
\caption{Perplexity Comparison vs. Latent Dimension (No Compression)}
\label{tab:perplexity_vs_latent_dim}
\begin{tabular}{rlll}
\toprule
Latent Dim & Dataset & Raw PPL & AE PPL \\
\midrule
8 & WikiText & 76.19 & 5049.08 \\
16 & WikiText & 76.19 & 2192.71 \\
32 & WikiText & 76.19 & 167.00 \\
\bottomrule
\end{tabular}
\end{table}


Perplexity vs. Latent Dimension plots (log y-axis) and table (with LaTeX output) saved/printed.


In [22]:
# KV‑cache compression analysis (open‑source only)
# ------------------------------------------------
import json, glob, re, statistics, itertools, os
import pandas as pd
import matplotlib.pyplot as plt
from typing import Optional, Dict, Any, List

# Ensure the plots directory exists
plots_dir = "./plots"
os.makedirs(plots_dir, exist_ok=True)

# 1) locate result files in the current directory
files = glob.glob("benchmark_results_distilgpt2_*.json")
if not files:
    raise FileNotFoundError("No benchmark_results_distilgpt2_*.json files in this folder!")

records: List[Dict[str, Any]] = []

# Function to extract latent dimension from filename
def get_latent_dim(filename: str) -> Optional[int]:
    match_ld = re.search(r"distilgpt2_(\d+)\.json$", filename)
    if match_ld:
        return int(match_ld.group(1))
    return None

quantization_levels = [2, 4, 8, 16, 32]

for path in files:
    latent_dim = get_latent_dim(path)
    if latent_dim is not None:
        with open(path) as fp:
            res = json.load(fp)

        raw_ppl: float = res["raw_baseline_ppl"]
        ae_ppl_none: Optional[float] = res["perplexities"].get("none", {}).get("ae_compressed_ppl")
        kv_ppl_quant = {bits: res["perplexities"].get(str(bits), {}).get("kv_cache_baseline_ppl") for bits in quantization_levels}

        if raw_ppl is not None and ae_ppl_none is not None:
            record = {"latent_dim": latent_dim, "raw_ppl": raw_ppl, "ae_ppl": ae_ppl_none, "dataset": "WikiText"}
            record.update({f"raw_quant{b}_ppl": kv_ppl_quant[b] for b in quantization_levels})
            records.append(record)

        lb_baseline = res.get("longbench", {}).get("baseline", {})
        lb_compressed_none = res.get("longbench", {}).get("compressed", {}).get("none", {})
        lb_compressed_quant = {
            bits: res.get("longbench", {}).get("compressed", {}).get(str(bits), {}) for bits in quantization_levels
        }

        for task, base_ppl in lb_baseline.items():
            comp_ppl_none = lb_compressed_none.get(task)
            comp_ppl_quant = {bits: lb_compressed_quant[bits].get(task) for bits in quantization_levels}
            if comp_ppl_none is not None:
                record = {"latent_dim": latent_dim, "raw_ppl": base_ppl, "ae_ppl": comp_ppl_none, "dataset": f"LongBench ({task})"}
                record.update({f"raw_quant{b}_ppl": comp_ppl_quant[b] for b in quantization_levels})
                records.append(record)

df = pd.DataFrame(records).sort_values(["latent_dim", "dataset"]).reset_index(drop=True)

latent_dims = sorted(df['latent_dim'].dropna().unique())
longbench_tasks = sorted([d.split('(')[1][:-1] for d in df['dataset'].unique() if d.startswith("LongBench")])

# Plot for WikiText Perplexity Comparison (Log Scale)
plt.figure(figsize=(10, 6))
wiki_data = df[df['dataset'] == 'WikiText'].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'] + [f'raw_quant{b}_ppl' for b in quantization_levels]).sort_values('latent_dim')
if not wiki_data.empty:
    plt.plot(wiki_data['latent_dim'], wiki_data['raw_ppl'], linestyle='--', marker='o', label='Raw Baseline (32-bit)')
    for bits in quantization_levels:
        plt.plot(wiki_data['latent_dim'], wiki_data[f'raw_quant{bits}_ppl'], marker='v', label=f'Raw ({bits}-bit)')
    plt.plot(wiki_data['latent_dim'], wiki_data['ae_ppl'], marker='x', label='AE (No Comp)')
    plt.xlabel("Latent Dimension")
    plt.ylabel("Perplexity (Log Scale)")
    plt.title("WikiText Perplexity vs. Latent Dimension")
    plt.yscale('log')
    plt.legend()
    plt.grid(True, which="both")
    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, "exp1_wikitext_perplexity_vs_latent_dim_all_quant_logy.png"))
    plt.close()

# Plot for LongBench Perplexity Comparison (averaged across tasks, Log Scale)
if longbench_tasks:
    plt.figure(figsize=(12, 7))
    longbench_data = df[df['dataset'].str.startswith('LongBench')].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'] + [f'raw_quant{b}_ppl' for b in quantization_levels])
    if not longbench_data.empty:
        avg_raw_ppl_lb = longbench_data.groupby('latent_dim')['raw_ppl'].mean()
        avg_ae_ppl_lb = longbench_data.groupby('latent_dim')['ae_ppl'].mean()
        avg_raw_quant_ppl_lb = {
            bits: longbench_data.groupby('latent_dim')[f'raw_quant{bits}_ppl'].mean() for bits in quantization_levels
        }

        plt.plot(avg_raw_ppl_lb.index, avg_raw_ppl_lb.values, linestyle='--', marker='o', label='Avg. Raw Baseline (32-bit)')
        for bits in quantization_levels:
            plt.plot(avg_raw_quant_ppl_lb[bits].index, avg_raw_quant_ppl_lb[bits].values, marker='v', label=f'Avg. Raw ({bits}-bit)')
        plt.plot(avg_ae_ppl_lb.index, avg_ae_ppl_lb.values, marker='x', label='Avg. AE (No Comp)')
        plt.xlabel("Latent Dimension")
        plt.ylabel("Average Perplexity (LongBench, Log Scale)")
        plt.yscale('log')
        plt.title("Average LongBench Perplexity vs. Latent Dimension")
        plt.legend()
        plt.grid(True, which="both")
        plt.tight_layout()
        plt.savefig(os.path.join(plots_dir, "exp1_longbench_avg_perplexity_vs_latent_dim_all_quant_logy.png"))
        plt.close()

# Create Table Data
table_data = []
wiki_data_table = df[df['dataset'] == 'WikiText'].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'] + [f'raw_quant{b}_ppl' for b in quantization_levels]).sort_values('latent_dim')
if not wiki_data_table.empty:
    for index, row in wiki_data_table.iterrows():
        table_row = {"Latent Dim": int(row['latent_dim']), "Dataset": "WikiText", "AE PPL": f"{row['ae_ppl']:.2f}", "Raw (32-bit) PPL": f"{row['raw_ppl']:.2f}"}
        table_row.update({f"Raw ({b}-bit) PPL": f"{row[f'raw_quant{b}_ppl']:.2f}" for b in quantization_levels})
        table_data.append(table_row)

longbench_data_table = df[df['dataset'].str.startswith('LongBench')].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'] + [f'raw_quant{b}_ppl' for b in quantization_levels])
if not longbench_data_table.empty:
    agg_funcs = {'Avg_AE_PPL': ('ae_ppl', 'mean'), 'Avg_Raw_PPL': ('raw_ppl', 'mean')}
    agg_funcs.update({f'Avg_Raw_{b}bit_PPL': (f'raw_quant{b}_ppl', 'mean') for b in quantization_levels})
    avg_results_lb = longbench_data_table.groupby('latent_dim').agg(**agg_funcs).reset_index()
    for index, row in avg_results_lb.iterrows():
        table_row = {"Latent Dim": int(row['latent_dim']), "Dataset": "LongBench (Avg)", "Avg. AE PPL": f"{row['Avg_AE_PPL']:.2f}", "Avg. Raw (32-bit) PPL": f"{row['Avg_Raw_PPL']:.2f}"}
        table_row.update({f"Avg. Raw ({b}-bit) PPL": f"{row[f'Avg_Raw_{b}bit_PPL']:.2f}" for b in quantization_levels})
        table_data.append(table_row)

table_df = pd.DataFrame(table_data)
display(table_df)

# Create LaTeX Table
latex_table = table_df.to_latex(index=False, float_format="%.2f", caption="Perplexity Comparison: AE vs. Raw Quantization Levels", label="tab:perplexity_comparison_all_quant")
print("\nLaTeX Table:")
print(latex_table)

print(f"\nPerplexity comparison plots (log y-axis) and table (with LaTeX output) saved/printed.")

Unnamed: 0,Latent Dim,Dataset,AE PPL,Raw (32-bit) PPL,Raw (2-bit) PPL,Raw (4-bit) PPL,Raw (8-bit) PPL,Raw (16-bit) PPL
0,8,WikiText,5049.08,76.19,1310.36,104.02,76.3,76.19
1,16,WikiText,2192.71,72.66,1150.16,97.59,72.62,72.65
2,32,WikiText,167.0,72.66,1150.16,97.59,72.62,72.65



LaTeX Table:
\begin{table}
\caption{Perplexity Comparison: AE vs. Raw Quantization Levels}
\label{tab:perplexity_comparison_all_quant}
\begin{tabular}{rlllllll}
\toprule
Latent Dim & Dataset & AE PPL & Raw (32-bit) PPL & Raw (2-bit) PPL & Raw (4-bit) PPL & Raw (8-bit) PPL & Raw (16-bit) PPL \\
\midrule
8 & WikiText & 5049.08 & 76.19 & 1310.36 & 104.02 & 76.30 & 76.19 \\
16 & WikiText & 2192.71 & 72.66 & 1150.16 & 97.59 & 72.62 & 72.65 \\
32 & WikiText & 167.00 & 72.66 & 1150.16 & 97.59 & 72.62 & 72.65 \\
\bottomrule
\end{tabular}
\end{table}


Perplexity comparison plots (log y-axis) and table (with LaTeX output) saved/printed.


In [24]:
# KV‑cache compression analysis (open‑source only)
# ------------------------------------------------
import json, glob, re, statistics, itertools, os
import pandas as pd
import matplotlib.pyplot as plt
from typing import Optional, Dict, Any, List

# Ensure the plots directory exists
plots_dir = "./plots"
os.makedirs(plots_dir, exist_ok=True)

# 1) locate result files in the current directory
files = glob.glob("benchmark_results_distilgpt2_*.json")
if not files:
    raise FileNotFoundError("No benchmark_results_distilgpt2_*.json files in this folder!")

records: List[Dict[str, Any]] = []

# Function to extract latent dimension from filename
def get_latent_dim(filename: str) -> Optional[int]:
    match_ld = re.search(r"distilgpt2_(\d+)\.json$", filename)
    if match_ld:
        return int(match_ld.group(1))
    return None

quantization_levels = [2, 4, 8, 16, 32]

for path in files:
    latent_dim = get_latent_dim(path)
    if latent_dim is not None:
        with open(path) as fp:
            res = json.load(fp)

        raw_ppl: float = res["raw_baseline_ppl"]
        ae_ppl_none: Optional[float] = res["perplexities"].get("none", {}).get("ae_compressed_ppl")
        kv_ppl_quant = {bits: res["perplexities"].get(str(bits), {}).get("kv_cache_baseline_ppl") for bits in quantization_levels}

        if raw_ppl is not None and ae_ppl_none is not None:
            record = {"latent_dim": latent_dim, "raw_ppl": raw_ppl, "ae_ppl": ae_ppl_none, "dataset": "WikiText"}
            record.update({f"raw_quant{b}_ppl": kv_ppl_quant[b] for b in quantization_levels})
            records.append(record)

        lb_baseline = res.get("longbench", {}).get("baseline", {})
        lb_compressed_none = res.get("longbench", {}).get("compressed", {}).get("none", {})
        lb_compressed_quant = {
            bits: res.get("longbench", {}).get("compressed", {}).get(str(bits), {}) for bits in quantization_levels
        }

        for task, base_ppl in lb_baseline.items():
            comp_ppl_none = lb_compressed_none.get(task)
            comp_ppl_quant = {bits: lb_compressed_quant[bits].get(task) for bits in quantization_levels}
            if comp_ppl_none is not None:
                record = {"latent_dim": latent_dim, "raw_ppl": base_ppl, "ae_ppl": comp_ppl_none, "dataset": f"LongBench ({task})"}
                record.update({f"raw_quant{b}_ppl": comp_ppl_quant[b] for b in quantization_levels})
                records.append(record)

df = pd.DataFrame(records).sort_values(["latent_dim", "dataset"]).reset_index(drop=True)

latent_dims = sorted(df['latent_dim'].dropna().unique())
longbench_tasks = sorted([d.split('(')[1][:-1] for d in df['dataset'].unique() if d.startswith("LongBench")])

# Plot for WikiText Perplexity Comparison (Log Scale) - Separate Plots
wiki_data = df[df['dataset'] == 'WikiText'].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'] + [f'raw_quant{b}_ppl' for b in quantization_levels]).sort_values('latent_dim')
if not wiki_data.empty:
    for bits in quantization_levels:
        plt.figure(figsize=(8, 5))
        plt.plot(wiki_data['latent_dim'], wiki_data[f'raw_quant{bits}_ppl'], marker='v', label=f'Raw ({bits}-bit)')
        plt.plot(wiki_data['latent_dim'], wiki_data['ae_ppl'], marker='x', label='AE')
        plt.xlabel("Latent Dimension")
        plt.ylabel("Perplexity (Log Scale)")
        plt.title(f"WikiText Perplexity vs. Latent Dim - vs. Raw ({bits}-bit)")
        plt.yscale('log')
        plt.legend()
        plt.grid(True, which="both")
        plt.tight_layout()
        plt.savefig(os.path.join(plots_dir, f"exp1_wikitext_perplexity_vs_latent_dim_quant{bits}_logy.png"))
        plt.close()

# Plot for LongBench Perplexity Comparison (averaged across tasks, Log Scale) - Separate Plots
if longbench_tasks:
    longbench_data = df[df['dataset'].str.startswith('LongBench')].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'] + [f'raw_quant{b}_ppl' for b in quantization_levels])
    if not longbench_data.empty:
        avg_ae_ppl_lb = longbench_data.groupby('latent_dim')['ae_ppl'].mean()
        avg_raw_quant_ppl_lb = {
            bits: longbench_data.groupby('latent_dim')[f'raw_quant{bits}_ppl'].mean() for bits in quantization_levels
        }

        for bits in quantization_levels:
            plt.figure(figsize=(10, 6))
            plt.plot(avg_raw_quant_ppl_lb[bits].index, avg_raw_quant_ppl_lb[bits].values, marker='v', label=f'Avg. Raw ({bits}-bit)')
            plt.plot(avg_ae_ppl_lb.index, avg_ae_ppl_lb.values, marker='x', label='Avg. AE')
            plt.xlabel("Latent Dimension")
            plt.ylabel("Average Perplexity (LongBench, Log Scale)")
            plt.title(f"Avg. LongBench PPL vs. Latent Dim - vs. Raw ({bits}-bit)")
            plt.yscale('log')
            plt.legend()
            plt.grid(True, which="both")
            plt.tight_layout()
            plt.savefig(os.path.join(plots_dir, f"exp1_longbench_avg_perplexity_vs_latent_dim_quant{bits}_logy.png"))
            plt.close()

# Create Table Data (as before)
table_data = []
wiki_data_table = df[df['dataset'] == 'WikiText'].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'] + [f'raw_quant{b}_ppl' for b in quantization_levels]).sort_values('latent_dim')
if not wiki_data_table.empty:
    for index, row in wiki_data_table.iterrows():
        table_row = {"Latent Dim": int(row['latent_dim']), "Dataset": "WikiText", "AE PPL": f"{row['ae_ppl']:.2f}"}
        table_row.update({f"Raw ({b}-bit) PPL": f"{row[f'raw_quant{b}_ppl']:.2f}" for b in quantization_levels})
        table_data.append(table_row)

longbench_data_table = df[df['dataset'].str.startswith('LongBench')].dropna(subset=['latent_dim', 'raw_ppl', 'ae_ppl'] + [f'raw_quant{b}_ppl' for b in quantization_levels])
if not longbench_data_table.empty:
    agg_funcs = {'Avg_AE_PPL': ('ae_ppl', 'mean')}
    agg_funcs.update({f'Avg_Raw_{b}bit_PPL': (f'raw_quant{b}_ppl', 'mean') for b in quantization_levels})
    avg_results_lb = longbench_data_table.groupby('latent_dim').agg(**agg_funcs).reset_index()
    for index, row in avg_results_lb.iterrows():
        table_row = {"Latent Dim": int(row['latent_dim']), "Dataset": "LongBench (Avg)", "Avg. AE PPL": f"{row['Avg_AE_PPL']:.2f}"}
        table_row.update({f"Avg. Raw ({b}-bit) PPL": f"{row[f'Avg_Raw_{b}bit_PPL']:.2f}" for b in quantization_levels})
        table_data.append(table_row)

table_df = pd.DataFrame(table_data)
display(table_df)

# Create LaTeX Table (as before)
latex_table = table_df.to_latex(index=False, float_format="%.2f", caption="Perplexity Comparison: AE vs. Raw Quantization Levels", label="tab:perplexity_comparison_all_quant")
print("\nLaTeX Table:")
print(latex_table)

print(f"\nPerplexity comparison plots (log y-axis) and table (with LaTeX output) saved/printed.")

Unnamed: 0,Latent Dim,Dataset,AE PPL,Raw (2-bit) PPL,Raw (4-bit) PPL,Raw (8-bit) PPL,Raw (16-bit) PPL,Raw (32-bit) PPL
0,8,WikiText,5049.08,1310.36,104.02,76.3,76.19,76.19
1,16,WikiText,2192.71,1150.16,97.59,72.62,72.65,72.66
2,32,WikiText,167.0,1150.16,97.59,72.62,72.65,72.66



LaTeX Table:
\begin{table}
\caption{Perplexity Comparison: AE vs. Raw Quantization Levels}
\label{tab:perplexity_comparison_all_quant}
\begin{tabular}{rlllllll}
\toprule
Latent Dim & Dataset & AE PPL & Raw (2-bit) PPL & Raw (4-bit) PPL & Raw (8-bit) PPL & Raw (16-bit) PPL & Raw (32-bit) PPL \\
\midrule
8 & WikiText & 5049.08 & 1310.36 & 104.02 & 76.30 & 76.19 & 76.19 \\
16 & WikiText & 2192.71 & 1150.16 & 97.59 & 72.62 & 72.65 & 72.66 \\
32 & WikiText & 167.00 & 1150.16 & 97.59 & 72.62 & 72.65 & 72.66 \\
\bottomrule
\end{tabular}
\end{table}


Perplexity comparison plots (log y-axis) and table (with LaTeX output) saved/printed.


In [32]:
# KV‑cache compression analysis (open‑source only) - Asymmetric AE (Layer Ratio) - Table Only
# ------------------------------------------------
import json, glob, re, statistics, itertools, os
import pandas as pd
from typing import Optional, Dict, Any, List

# 1) locate result files for asymmetric AE (layer ratio)
files = glob.glob("benchmark_results_distilgpt2_ae_*.json")
if not files:
    raise FileNotFoundError("No benchmark_results_distilgpt2_ae_*.json files found!")

records: List[Dict[str, Any]] = []

# Function to extract encoder and decoder layer counts from filename
def get_layer_counts_asym(filename: str) -> Optional[tuple[int, int]]:
    match_ratio = re.search(r"distilgpt2_ae_(\d+)_(\d+)\.json$", filename)
    if match_ratio:
        encoder_layers = int(match_ratio.group(1))
        decoder_layers = int(match_ratio.group(2))
        return (encoder_layers, decoder_layers)
    return None

quantization_levels = [2, 4, 8, 16, 32]

for path in files:
    layer_counts = get_layer_counts_asym(path)
    if layer_counts:
        encoder_layers, decoder_layers = layer_counts
        config_str = f"{encoder_layers}:{decoder_layers}"
        with open(path) as fp:
            res = json.load(fp)

        raw_ppl: float = res["raw_baseline_ppl"]
        ae_ppl_none: Optional[float] = res["perplexities"].get("none", {}).get("ae_compressed_ppl")
        kv_ppl_quant = {str(bits): res["perplexities"].get(str(bits), {}).get("kv_cache_baseline_ppl") for bits in quantization_levels}

        if raw_ppl is not None and ae_ppl_none is not None:
            records.append(dict(
                encoder_layers=encoder_layers,
                decoder_layers=decoder_layers,
                config=config_str,
                raw_ppl=raw_ppl,
                ae_ppl=ae_ppl_none,
                dataset="WikiText"
            ))
            for bits_str, ppl in kv_ppl_quant.items():
                if ppl is not None:
                    records.append(dict(
                        encoder_layers=encoder_layers,
                        decoder_layers=decoder_layers,
                        config=config_str,
                        raw_ppl=raw_ppl,
                        ae_ppl=ppl,
                        dataset=f"WikiText (Raw {bits_str}-bit)"
                    ))

        lb_baseline = res.get("longbench", {}).get("baseline", {})
        lb_compressed_none = res.get("longbench", {}).get("compressed", {}).get("none", {})
        lb_compressed_quant = {
            str(bits): res.get("longbench", {}).get("compressed", {}).get(str(bits), {}) for bits in quantization_levels
        }

        for task, base_ppl in lb_baseline.items():
            comp_ppl_none = lb_compressed_none.get(task)
            if comp_ppl_none is not None:
                records.append(dict(
                    encoder_layers=encoder_layers,
                    decoder_layers=decoder_layers,
                    config=config_str,
                    raw_ppl=base_ppl,
                    ae_ppl=comp_ppl_none,
                    dataset=f"LongBench ({task})"
                ))
                for bits_str, quant_data in lb_compressed_quant.items():
                    raw_quant_ppl_lb = quant_data.get(task)
                    if raw_quant_ppl_lb is not None:
                        records.append(dict(
                            encoder_layers=encoder_layers,
                            decoder_layers=decoder_layers,
                            config=config_str,
                            raw_ppl=base_ppl,
                            ae_ppl=raw_quant_ppl_lb,
                            dataset=f"LongBench ({task}) (Raw {bits_str}-bit)"
                        ))

df = pd.DataFrame(records).sort_values(["encoder_layers", "decoder_layers", "dataset"]).reset_index(drop=True)

# Create Table Data for Asymmetric AE (Layer Ratio)
table_data_asym = []
wiki_data_table_asym = df[df['dataset'] == 'WikiText'].dropna(subset=['encoder_layers', 'decoder_layers', 'ae_ppl']).sort_values(['encoder_layers', 'decoder_layers'])
if not wiki_data_table_asym.empty:
    for index, row in wiki_data_table_asym.iterrows():
        table_row = {"Encoder Layers": int(row['encoder_layers']), "Decoder Layers": int(row['decoder_layers']), "Dataset": "WikiText", "Asym. AE PPL": f"{row['ae_ppl']:.2f}"}
        for bits in quantization_levels:
            raw_quant_row = df[(df['dataset'] == f'WikiText (Raw {bits}-bit)') & (df['encoder_layers'] == row['encoder_layers']) & (df['decoder_layers'] == row['decoder_layers'])]
            if not raw_quant_row.empty:
                table_row[f"Raw ({bits}-bit) PPL"] = f"{raw_quant_row['ae_ppl'].iloc[0]:.2f}"
            else:
                table_row[f"Raw ({bits}-bit) PPL"] = "N/A"
        table_data_asym.append(table_row)

longbench_ae_table_data = df[df['dataset'].str.startswith('LongBench') & (df['dataset'].str.find('(Raw') == -1)].dropna(subset=['encoder_layers', 'decoder_layers', 'ae_ppl'])
if not longbench_ae_table_data.empty:
    grouped_lb = longbench_ae_table_data.groupby(['encoder_layers', 'decoder_layers'])['ae_ppl'].mean().reset_index()
    for index, row in grouped_lb.iterrows():
        table_row = {"Encoder Layers": int(row['encoder_layers']), "Decoder Layers": int(row['decoder_layers']), "Dataset": "LongBench (Avg)", "Avg. Asym. AE PPL": f"{row['ae_ppl']:.2f}"}
        for bits in quantization_levels:
            avg_raw_quant_ppls = []
            for task in longbench_tasks:
                raw_data = df[(df['dataset'] == f'LongBench ({task}) (Raw {bits}-bit)') & (df['encoder_layers'] == row['encoder_layers']) & (df['decoder_layers'] == row['decoder_layers'])]
                if not raw_data.empty:
                    avg_raw_quant_ppls.append(raw_data['ae_ppl'].mean())
            if avg_raw_quant_ppls:
                table_row[f"Avg. Raw ({bits}-bit) PPL"] = f"{statistics.mean(avg_raw_quant_ppls):.2f}"
            else:
                table_row[f"Avg. Raw ({bits}-bit) PPL"] = "N/A"
        table_data_asym.append(table_row)

table_df_asym = pd.DataFrame(table_data_asym)
display(table_df_asym)

# Create LaTeX Table for Asymmetric AE (Layer Ratio)
latex_table_asym = table_df_asym.to_latex(index=False, float_format="%.2f", caption="Perplexity Comparison: Asymmetric AE (Layer Ratio) vs. Raw Quantization Levels", label="tab:asym_perplexity_comparison_layer_ratio")
print("\nLaTeX Table (Asymmetric AE - Layer Ratio):\n")
print(latex_table_asym)

Unnamed: 0,Encoder Layers,Decoder Layers,Dataset,Asym. AE PPL,Raw (2-bit) PPL,Raw (4-bit) PPL,Raw (8-bit) PPL,Raw (16-bit) PPL,Raw (32-bit) PPL
0,1,1,WikiText,167.0,1150.16,97.59,72.62,72.65,72.66
1,2,1,WikiText,166.95,1150.16,97.59,72.66,72.65,72.66
2,2,2,WikiText,275.12,1150.16,97.59,72.62,72.65,72.66
3,3,1,WikiText,163.3,1150.16,97.59,72.62,72.65,72.66



LaTeX Table (Asymmetric AE - Layer Ratio):

\begin{table}
\caption{Perplexity Comparison: Asymmetric AE (Layer Ratio) vs. Raw Quantization Levels}
\label{tab:asym_perplexity_comparison_layer_ratio}
\begin{tabular}{rrlllllll}
\toprule
Encoder Layers & Decoder Layers & Dataset & Asym. AE PPL & Raw (2-bit) PPL & Raw (4-bit) PPL & Raw (8-bit) PPL & Raw (16-bit) PPL & Raw (32-bit) PPL \\
\midrule
1 & 1 & WikiText & 167.00 & 1150.16 & 97.59 & 72.62 & 72.65 & 72.66 \\
2 & 1 & WikiText & 166.95 & 1150.16 & 97.59 & 72.66 & 72.65 & 72.66 \\
2 & 2 & WikiText & 275.12 & 1150.16 & 97.59 & 72.62 & 72.65 & 72.66 \\
3 & 1 & WikiText & 163.30 & 1150.16 & 97.59 & 72.62 & 72.65 & 72.66 \\
\bottomrule
\end{tabular}
\end{table}



In [34]:
import json
import glob
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np

# Ensure the plots directory exists
plots_dir = "./plots_experiment_2_averaged"
os.makedirs(plots_dir, exist_ok=True)

files = glob.glob("benchmark_results_distilgpt2_*.json")
relevant_files = [f for f in files if f.endswith(("8.json", "16.json", "32.json"))]

if not relevant_files:
    raise FileNotFoundError("No benchmark_results_distilgpt2_{8,16,32}.json files found!")

data_points = []

for path in relevant_files:
    latent_dim = int(path.split('_')[-1].split('.')[0])
    with open(path) as f:
        data = json.load(f)
        data_points.append({
            "latent_dim": latent_dim,
            "ttft_ae_none": data["perplexities"]["none"].get("avg_decompression_speed_per_token_s"),
            "overhead_ae_none": data["perplexities"]["none"].get("ae_compressed_overhead_per_token_s"),
            "ttft_raw_none": data["perplexities"]["none"].get("kv_cache_baseline_overhead_per_token_s"),
        })

df = pd.DataFrame(data_points)

# Average the values for each latent dimension
averaged_df = df.groupby("latent_dim").agg(
    avg_ttft_ae_none=("ttft_ae_none", "mean"),
    avg_overhead_ae_none=("overhead_ae_none", "mean"),
    first_ttft_raw_none=("ttft_raw_none", "first") # Baseline should be consistent, so take the first value
).reset_index()

# Calculate the average raw baseline overhead
avg_ttft_raw_none = df["ttft_raw_none"].mean()

# Plotting Averaged Time to First Token (TTFT) - AE Compressed (None)
plt.figure(figsize=(8, 6))
plt.plot(averaged_df["latent_dim"], averaged_df["avg_ttft_ae_none"], marker='o', linestyle='-', label="Average AE Decompression Speed")
plt.xlabel("Autoencoder Latent Dimension")
plt.ylabel("Average Decompression Speed (s/token)")
plt.title("Average Decompression Speed vs. Autoencoder Latent Dimension")
plt.xticks(averaged_df["latent_dim"])
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "exp2_avg_decompression_speed_vs_latent_dim.png"))
plt.close()

# Plotting Averaged Overheads
plt.figure(figsize=(8, 6))
plt.plot(averaged_df["latent_dim"], averaged_df["avg_overhead_ae_none"], marker='o', linestyle='-', label="Average AE Compressed Overhead")
plt.axhline(y=avg_ttft_raw_none, linestyle='--', color='r', label=f"Average Raw Baseline Overhead ({avg_ttft_raw_none:.6f} s/token)")
plt.xlabel("Autoencoder Latent Dimension")
plt.ylabel("Average Overhead per Token (s)")
plt.title("Average Overhead vs. Autoencoder Latent Dimension")
plt.xticks(averaged_df["latent_dim"])
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "exp2_avg_overhead_vs_latent_dim.png"))
plt.close()

# Create Table with Averaged Values
table_data = []
for index, row in averaged_df.iterrows():
    table_data.append({
        "Latent Dimension": int(row["latent_dim"]),
        "Average AE Decompression Speed (s/token)": f"{row['avg_ttft_ae_none']:.6f}",
        "Average AE Compressed Overhead (s/token)": f"{row['avg_overhead_ae_none']:.6f}",
        "Average Raw Baseline Overhead (s/token)": f"{avg_ttft_raw_none:.6f}",
    })

table_df = pd.DataFrame(table_data)
display(table_df)

# Generate LaTeX Table with Averaged Values
latex_table = table_df.to_latex(index=False, float_format="%.6f", caption="Average Decompression Speed and Overhead vs. Autoencoder Latent Dimension", label="tab:exp2_avg_speed_overhead_vs_latent_dim")

print("\nLaTeX Table (Experiment 2 - Averaged):\n")
print(latex_table)

print(f"\nAveraged plots and LaTeX table for Experiment 2 (Decompression Speed vs. Latent Dimension) created.")

Unnamed: 0,Latent Dimension,Average AE Decompression Speed (s/token),Average AE Compressed Overhead (s/token),Average Raw Baseline Overhead (s/token)
0,8,0.042156,0.006435,0.006337
1,16,0.041232,0.006514,0.006337
2,32,0.042075,0.00666,0.006337



LaTeX Table (Experiment 2 - Averaged):

\begin{table}
\caption{Average Decompression Speed and Overhead vs. Autoencoder Latent Dimension}
\label{tab:exp2_avg_speed_overhead_vs_latent_dim}
\begin{tabular}{rlll}
\toprule
Latent Dimension & Average AE Decompression Speed (s/token) & Average AE Compressed Overhead (s/token) & Average Raw Baseline Overhead (s/token) \\
\midrule
8 & 0.042156 & 0.006435 & 0.006337 \\
16 & 0.041232 & 0.006514 & 0.006337 \\
32 & 0.042075 & 0.006660 & 0.006337 \\
\bottomrule
\end{tabular}
\end{table}


Averaged plots and LaTeX table for Experiment 2 (Decompression Speed vs. Latent Dimension) created.


In [36]:
import json
import glob
import pandas as pd
import matplotlib.pyplot as plt
import os
import re

# Ensure the plots directory exists
plots_dir = "./plots_experiment_4_comparisons"
os.makedirs(plots_dir, exist_ok=True)

files = glob.glob("benchmark_results_distilgpt2_ae_*.json")

if not files:
    raise FileNotFoundError("No benchmark_results_distilgpt2_ae_*.json files found!")

quantization_levels = [2, 4, 8, 16, 32]
asym_ae_results = {}
raw_baseline_ppl = {}

def get_layer_counts_asym(filename: str) -> Optional[tuple[int, int]]:
    match_ratio = re.search(r"distilgpt2_ae_(\d+)_(\d+)\.json$", filename)
    if match_ratio:
        encoder_layers = int(match_ratio.group(1))
        decoder_layers = int(match_ratio.group(2))
        return (encoder_layers, decoder_layers)
    return None

for path in files:
    layer_counts = get_layer_counts_asym(path)
    with open(path) as f:
        data = json.load(f)
        for bits in quantization_levels:
            raw_ppl = data["perplexities"].get(str(bits), {}).get("kv_cache_baseline_ppl")
            if raw_ppl is not None:
                raw_baseline_ppl.setdefault(bits, []).append(raw_ppl)

        if layer_counts and layer_counts[0] != layer_counts[1]:  # Identify asymmetric AEs
            encoder_layers, decoder_layers = layer_counts
            config_str = f"{encoder_layers}:{decoder_layers}"
            asym_ae_results.setdefault(config_str, [])
            for bits in quantization_levels:
                ae_ppl = data["perplexities"].get(str(bits), {}).get("ae_compressed_ppl")
                if ae_ppl is not None:
                    asym_ae_results[config_str].append({"bits": bits, "ppl": ae_ppl})

# Average raw baseline perplexity for each bit level
avg_raw_baseline_ppl = {bits: sum(ppls) / len(ppls) if ppls else None for bits, ppls in raw_baseline_ppl.items()}

# Generate pairwise comparison plots
for config, data in asym_ae_results.items():
    df_ae = pd.DataFrame(data).sort_values("bits")
    if not df_ae.empty:
        plt.figure(figsize=(10, 6))
        plt.plot(df_ae["bits"], df_ae["ppl"], marker='o', linestyle='-', label=f"Asym. AE ({config}) + Quant.")
        if avg_raw_baseline_ppl:
            raw_ppl_values = [avg_raw_baseline_ppl.get(bit) for bit in df_ae["bits"]]
            plt.plot(df_ae["bits"], raw_ppl_values, marker='x', linestyle='--', label="Average Raw Quantization")
        plt.xlabel("Quantization Bits")
        plt.ylabel("Perplexity (Log Scale)")
        plt.title(f"Perplexity vs. Quantization Bits (Asym. AE: {config} vs. Avg. Raw)")
        plt.yscale("log")
        plt.xticks(quantization_levels)
        plt.grid(True, which="both", linestyle="-")
        plt.legend()
        plt.tight_layout()
        plt.savefig(os.path.join(plots_dir, f"exp4_pairwise_perplexity_vs_quant_bits_asym_ae_{config.replace(':', '_')}.png"))
        plt.close()

# Generate combined plot
plt.figure(figsize=(12, 7))
if avg_raw_baseline_ppl:
    plt.plot(quantization_levels, [avg_raw_baseline_ppl.get(bit) for bit in quantization_levels], marker='s', linestyle='-', label="Average Raw Quantization", color='black')

for config, data in asym_ae_results.items():
    df_ae = pd.DataFrame(data).sort_values("bits")
    if not df_ae.empty:
        plt.plot(df_ae["bits"], df_ae["ppl"], marker='o', linestyle='-', label=f"Asym. AE ({config}) + Quant.")

plt.xlabel("Quantization Bits")
plt.ylabel("Perplexity (Log Scale)")
plt.title("Perplexity vs. Quantization Bits (All Asym. AEs vs. Avg. Raw)")
plt.yscale("log")
plt.xticks(quantization_levels)
plt.grid(True, which="both", linestyle="-")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "exp4_combined_perplexity_vs_quant_bits_asym_aes.png"))
plt.close()

print(f"\nGenerated pairwise and combined plots for Experiment 4 in the '{plots_dir}' directory.")


Generated pairwise and combined plots for Experiment 4 in the './plots_experiment_4_comparisons' directory.


In [37]:
import json
import glob
import pandas as pd
import matplotlib.pyplot as plt
import os
import re

# Ensure the plots directory exists
plots_dir = "./plots_experiment_4_comparison_2_2_vs_2_1"
os.makedirs(plots_dir, exist_ok=True)

files = glob.glob("benchmark_results_distilgpt2_ae_*.json")

if not files:
    raise FileNotFoundError("No benchmark_results_distilgpt2_ae_*.json files found!")

quantization_levels = [2, 4, 8, 16, 32]
comparison_results = {}

def get_layer_counts_asym(filename: str) -> Optional[tuple[int, int]]:
    match_ratio = re.search(r"distilgpt2_ae_(\d+)_(\d+)\.json$", filename)
    if match_ratio:
        encoder_layers = int(match_ratio.group(1))
        decoder_layers = int(match_ratio.group(2))
        return (encoder_layers, decoder_layers)
    return None

for path in files:
    layer_counts = get_layer_counts_asym(path)
    if layer_counts and layer_counts in [(2, 2), (2, 1)]:
        encoder_layers, decoder_layers = layer_counts
        config_str = f"{encoder_layers}:{decoder_layers}"
        comparison_results.setdefault(config_str, [])
        with open(path) as f:
            data = json.load(f)
            for bits in quantization_levels:
                ae_ppl = data["perplexities"].get(str(bits), {}).get("ae_compressed_ppl")
                if ae_ppl is not None:
                    comparison_results[config_str].append({"bits": bits, "ppl": ae_ppl})

# Generate comparison plot for 2:2 vs 2:1
plt.figure(figsize=(10, 6))

for config, data in comparison_results.items():
    df_ae = pd.DataFrame(data).sort_values("bits")
    if not df_ae.empty:
        plt.plot(df_ae["bits"], df_ae["ppl"], marker='o', linestyle='-', label=f"Asym. AE ({config}) + Quant.")

plt.xlabel("Quantization Bits")
plt.ylabel("Perplexity (Log Scale)")
plt.title("Perplexity vs. Quantization Bits (Asym. AE 2:2 vs 2:1)")
plt.yscale("log")
plt.xticks(quantization_levels)
plt.grid(True, which="both", linestyle="-")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "exp4_comparison_perplexity_vs_quant_bits_ae_2_2_vs_2_1.png"))
plt.close()

print(f"\nGenerated comparison plot for Asymmetric AE 2:2 vs 2:1 in the '{plots_dir}' directory.")


Generated comparison plot for Asymmetric AE 2:2 vs 2:1 in the './plots_experiment_4_comparison_2_2_vs_2_1' directory.


In [8]:
#!/usr/bin/env python3
# compare_sym_vs_asym.py

import os, glob, json, statistics
import matplotlib.pyplot as plt

# 1) setup
plots_dir = "./plots"
os.makedirs(plots_dir, exist_ok=True)

# find only the AE result files
ae_files = glob.glob("benchmark_results_distilgpt2*_ae_*.json")
if not ae_files:
    raise RuntimeError("No *ae_*.json files found")

quant_bits = [2, 4, 8, 16]

# 2) gather per‐run stats, grouping symmetrics into one "Sym" bucket
wiki_map = {}         # label -> list of wiki-ppls
longbench_map = {}    # label -> list of {bit->avg longbench-ppl}

for fp in ae_files:
    name = os.path.basename(fp) \
             .replace("benchmark_results_distilgpt2_","") \
             .replace(".json","")  # e.g. "ae_2_1"
    # rename the two symmetric runs
    if name in ("ae_1_1","ae_2_2"):
        label = "Sym ae_1_1"
    else:
        label = f"Asym {name}"

    data = json.load(open(fp))

    # WikiText PPL
    w_ppl = data["perplexities"]["none"]["ae_compressed_ppl"]
    wiki_map.setdefault(label, []).append(w_ppl)

    # LongBench average PPL over tasks
    tasks = data["longbench"]["baseline"].keys()
    lb_avg = {
        b: statistics.mean(
             data["longbench"]["compressed"][str(b)][task]
             for task in tasks
        )
        for b in quant_bits
    }
    longbench_map.setdefault(label, []).append(lb_avg)

# 3) average across runs per label
wiki_avg = {lbl: statistics.mean(vals) for lbl, vals in wiki_map.items()}
longbench_avg = {
    lbl: {
       b: statistics.mean(d[b] for d in dicts)
       for b in quant_bits
    }
    for lbl, dicts in longbench_map.items()
}

# 4) plot WikiText bar chart
plt.figure(figsize=(8,4))
labels = list(wiki_avg.keys())
ppls   = [wiki_avg[lbl] for lbl in labels]

plt.bar(labels, ppls)
plt.yscale("log")
plt.ylabel("Perplexity on WikiText")
plt.title("WikiText PPL: Sym vs. Asym AEs")
plt.xticks(rotation=30, ha="right")
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "wiki_sym_vs_asym.png"))
plt.close()

# 5) plot LongBench line chart
plt.figure(figsize=(8,4))
for lbl, series in longbench_avg.items():
    plt.plot(quant_bits,
             [series[b] for b in quant_bits],
             marker="o",
             label=lbl)

plt.xlim(min(quant_bits), max(quant_bits))
plt.yscale("log")
plt.xlabel("Quantization bits")
plt.ylabel("Avg. Perplexity on LongBench")
plt.title("LongBench PPL vs Bits: Sym vs. Asym AEs")
plt.legend()
plt.grid(which="both", linestyle=":")
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "longbench_sym_vs_asym.png"))
plt.close()

print("✅ Done! See ./plots/wiki_sym_vs_asym.png and longbench_sym_vs_asym.png")

✅ Done! See ./plots/wiki_sym_vs_asym.png and longbench_sym_vs_asym.png


In [11]:
#!/usr/bin/env python3
# compare_asymmetric_ae_group1_fixed.py

import os, glob, json, statistics
import matplotlib.pyplot as plt

plots_dir = "./plots"
os.makedirs(plots_dir, exist_ok=True)

# pick your three runs
group = ["3_1", "2_1", "1_1"]
labels = [f"AE_{x}" for x in group]

all_json = glob.glob("benchmark_results_distilgpt2*_ae_*.json")
group_files = {}
for lbl, suf in zip(labels, group):
    fn = next((f for f in all_json if f"_ae_{suf}.json" in f), None)
    if not fn:
        raise FileNotFoundError(f"Couldn't find JSON for AE_{suf}")
    group_files[lbl] = fn

# 1) WikiText
wiki_ppls = {}
for lbl, fn in group_files.items():
    r = json.load(open(fn))
    wiki_ppls[lbl] = r["perplexities"]["none"]["ae_compressed_ppl"]

plt.figure(figsize=(6,4))
plt.bar(wiki_ppls.keys(), wiki_ppls.values())
plt.yscale("log")
plt.ylabel("Perplexity on WikiText")
plt.title("WikiText PPL: AE 3_1 vs 2_1 vs 1_1")
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "wiki_asym_3_1_2_1_1_1.png"))
plt.close()

# 2) LongBench
quant_bits = [2,4,8,16]
longbench_avg = {}

for lbl, fn in group_files.items():
    r = json.load(open(fn))
    # get the list of tasks from the baseline
    tasks = r["longbench"]["baseline"].keys()
    # average only the numeric PPLs per bit
    avg_ppl = {
      b: statistics.mean(
           r["longbench"]["compressed"][str(b)][task]
           for task in tasks
      )
      for b in quant_bits
    }
    longbench_avg[lbl] = avg_ppl

plt.figure(figsize=(6,4))
for lbl, series in longbench_avg.items():
    plt.plot(quant_bits,
             [series[b] for b in quant_bits],
             marker="o", label=lbl)

plt.xlim(min(quant_bits), max(quant_bits))
plt.yscale("log")
plt.xlabel("Quantization bits")
plt.ylabel("Avg. Perplexity on LongBench")
plt.title("LongBench PPL vs Bits: AE 3_1 vs 2_1 vs 1_1")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "longbench_asym_3_1_2_1_1_1.png"))
plt.close()

print("✅ Group1 plots written to ./plots/")

✅ Group1 plots written to ./plots/


In [13]:
#!/usr/bin/env python3
# compare_asymmetric_ae_group1.py

import os
import glob
import json
import statistics
import matplotlib.pyplot as plt

# --- Configuration ---
plots_dir = "./plots"
os.makedirs(plots_dir, exist_ok=True)

# Which asymmetric-AE runs to compare
group = ["3_1", "2_1", "1_1"]
labels = [f"AE_{s}" for s in group]

# Discover all AE JSON files
all_json = glob.glob("benchmark_results_distilgpt2_ae_*.json")
if not all_json:
    raise RuntimeError("No benchmark_results_distilgpt2_ae_*.json files found")

# Map labels to their file paths
group_files = {}
for lbl, suffix in zip(labels, group):
    fn = next((f for f in all_json if f"_ae_{suffix}.json" in f), None)
    if fn is None:
        raise FileNotFoundError(f"Couldn't find JSON for AE_{suffix}")
    group_files[lbl] = fn

# --- 1) WikiText bar chart ---
wiki_ppls = {}
for lbl, fn in group_files.items():
    data = json.load(open(fn))
    wiki_ppls[lbl] = data["perplexities"]["none"]["ae_compressed_ppl"]

plt.figure(figsize=(6, 4))
plt.bar(wiki_ppls.keys(), wiki_ppls.values())
plt.yscale("log")
plt.ylabel("Perplexity on WikiText")
plt.title("WikiText PPL: AE 3_1 vs 2_1 vs 1_1")
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "wiki_asym_3_1_2_1_1_1.png"))
plt.close()

# --- 2) LongBench line chart ---
quant_bits = [2, 4, 8, 16]
longbench_avg = {}

for lbl, fn in group_files.items():
    data = json.load(open(fn))
    # Use baseline task list to pull only the numeric PPLs
    tasks = data["longbench"]["baseline"].keys()
    avg_ppl = {
        b: statistics.mean(
            data["longbench"]["compressed"][str(b)][task]
            for task in tasks
        )
        for b in quant_bits
    }
    longbench_avg[lbl] = avg_ppl

plt.figure(figsize=(6, 4))
for lbl, series in longbench_avg.items():
    plt.plot(
        quant_bits,
        [series[b] for b in quant_bits],
        marker="o",
        label=lbl
    )

plt.xlim(min(quant_bits), max(quant_bits))
plt.yscale("log")
plt.xlabel("Quantization bits")
plt.ylabel("Avg. Perplexity on LongBench")
plt.title("LongBench PPL vs Bits: AE 3_1 vs 2_1 vs 1_1")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "longbench_asym_3_1_2_1_1_1.png"))
plt.close()

print("✅ Group1 plots written to ./plots/")

✅ Group1 plots written to ./plots/


In [14]:
#!/usr/bin/env python3
# compare_overhead.py

import os
import glob
import json
import numpy as np
import matplotlib.pyplot as plt

# 1) Setup
plots_dir = "./plots"
os.makedirs(plots_dir, exist_ok=True)

# 2) Locate the symmetric-latent=32 JSON (baseline reference)
all_files = glob.glob("benchmark_results_distilgpt2_*.json")
ref_file = next(
    f for f in all_files
    if f.endswith("_32.json") and "_ae_" not in f
)
with open(ref_file) as fp:
    data = json.load(fp)

# 3) Define the quantization levels (including the "none" case)
levels = ["none", 2, 4, 8, 16, 32]

# 4) Extract per‐token overheads for baseline vs. AE
baseline_overhead = []
ae_overhead       = []
for lvl in levels:
    key = str(lvl)
    p = data["perplexities"][key]
    baseline_overhead.append(p["kv_cache_baseline_overhead_per_token_s"])
    ae_overhead.append(p["ae_compressed_overhead_per_token_s"])

# 5) Plot: grouped bar chart
x = np.arange(len(levels))
width = 0.35

plt.figure(figsize=(8, 4))
plt.bar(x - width/2, baseline_overhead, width, label="KV Baseline")
plt.bar(x + width/2, ae_overhead,       width, label="AE Compressed")
plt.xticks(x, [str(l) for l in levels])
plt.xlabel("Quantization bits")
plt.ylabel("Overhead per token (s)")
plt.title("Per‐token Overhead: KV Baseline vs AE Compressed")
plt.legend()
plt.grid(axis="y", linestyle=":")
plt.tight_layout()
plt.savefig(os.path.join(plots_dir, "overhead_baseline_vs_ae.png"))
plt.close()

print("✅ Overhead comparison plot saved to ./plots/overhead_baseline_vs_ae.png")

✅ Overhead comparison plot saved to ./plots/overhead_baseline_vs_ae.png
