# Expression-Preserving Face Anonymization — Final Report

**COMP4026 Final-Year Project — HKBU**

This notebook loads all experiment results, produces summary tables, and
generates privacy–utility Pareto frontier visualisations.

In [None]:
import os, sys
import json
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({"figure.dpi": 120, "font.size": 10})

# Ensure project root is on the path
ROOT = Path(os.getcwd()).parent
RESULTS = ROOT / "results"
print(f"Project root: {ROOT}")
print(f"Results dir:  {RESULTS}")

## 1 — Baseline Sweep (Classical Anonymizers)

In [None]:
baseline_csv = RESULTS / "baseline_sweep" / "frontier.csv"
df_baseline = pd.read_csv(baseline_csv)
cols = ["anonymizer", "params", "closed_set_top1", "privacy_score",
        "acc_anonymized", "expr_consistency", "utility_score",
        "lpips_mean", "psnr_mean", "ssim_mean"]
df_baseline[cols].sort_values("privacy_score", ascending=False)

In [None]:
fig, ax = plt.subplots(figsize=(8, 5))
for anon in df_baseline["anonymizer"].unique():
    sub = df_baseline[df_baseline["anonymizer"] == anon]
    ax.scatter(sub["privacy_score"], sub["utility_score"], label=anon, s=60)
ax.set_xlabel("Privacy Score (higher = more private)")
ax.set_ylabel("Utility Score (higher = better expression)")
ax.set_title("Baseline Sweep — Privacy vs Utility Frontier")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 2 — k-Same Sweep

In [None]:
ksame_csv = RESULTS / "ksame_sweep" / "frontier.csv"
df_ksame = pd.read_csv(ksame_csv)
df_ksame[cols].sort_values("privacy_score", ascending=False)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4.5))

# Extract k from params JSON
df_ksame["k"] = df_ksame["params"].apply(lambda p: json.loads(p).get("k", 0))

ax = axes[0]
ax.plot(df_ksame["k"], df_ksame["closed_set_top1"], "o-", color="tab:red")
ax.set_xlabel("k")
ax.set_ylabel("Closed-set Top-1 (lower = more private)")
ax.set_title("k-Same: Identity Leakage vs k")
ax.grid(True, alpha=0.3)

ax = axes[1]
ax.plot(df_ksame["k"], df_ksame["utility_score"], "s-", color="tab:blue")
ax.set_xlabel("k")
ax.set_ylabel("Utility Score")
ax.set_title("k-Same: Expression Utility vs k")
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 3 — GAN-Based Anonymiser Comparison

In [None]:
gan_csv = RESULTS / "gan_comparison" / "frontier.csv"
df_gan = pd.read_csv(gan_csv)
df_gan[cols].sort_values("privacy_score", ascending=False)

In [None]:
fig, ax = plt.subplots(figsize=(7, 5))
for _, row in df_gan.iterrows():
    ax.scatter(row["privacy_score"], row["utility_score"], s=100, zorder=5)
    ax.annotate(row["anonymizer"], (row["privacy_score"], row["utility_score"]),
                textcoords="offset points", xytext=(8, 4), fontsize=9)

# Overlay baseline frontier for context
ax.scatter(df_baseline["privacy_score"], df_baseline["utility_score"],
           marker="x", alpha=0.4, color="gray", label="Baselines")
ax.set_xlabel("Privacy Score")
ax.set_ylabel("Utility Score")
ax.set_title("GAN Anonymisers vs Classical Baselines")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 4 — Conditioning Ablation

In [None]:
cond_csv = RESULTS / "conditioning_ablation" / "frontier.csv"
df_cond = pd.read_csv(cond_csv)
df_cond[cols]

In [None]:
# Read the ablation markdown table
ablation_md = (RESULTS / "conditioning_ablation" / "ablation_table.md").read_text()
from IPython.display import Markdown
Markdown(ablation_md)

## 5 — Expression-Loss Ablation

In [None]:
expr_csv = RESULTS / "expression_loss_ablation" / "frontier.csv"
df_expr = pd.read_csv(expr_csv)
df_expr[cols]

In [None]:
ablation_expr_md = (RESULTS / "expression_loss_ablation" / "ablation_table.md").read_text()
Markdown(ablation_expr_md)

## 6 — Adaptive Attacker Ablation

In [None]:
adaptive_csv = RESULTS / "adaptive_attacker_ablation" / "comparison.csv"
df_adaptive = pd.read_csv(adaptive_csv)
df_adaptive

In [None]:
adaptive_md = (RESULTS / "adaptive_attacker_ablation" / "comparison_table.md").read_text()
Markdown(adaptive_md)

## 7 — Combined Pareto Frontier

In [None]:
# Combine all frontier CSVs
frames = []
for label, df in [("Baseline", df_baseline), ("k-Same", df_ksame), ("GAN", df_gan)]:
    tmp = df.copy()
    tmp["experiment"] = label
    frames.append(tmp)
df_all = pd.concat(frames, ignore_index=True)

fig, ax = plt.subplots(figsize=(10, 6))
markers = {"Baseline": "o", "k-Same": "s", "GAN": "D"}
for exp in ["Baseline", "k-Same", "GAN"]:
    sub = df_all[df_all["experiment"] == exp]
    ax.scatter(sub["privacy_score"], sub["utility_score"],
               marker=markers[exp], s=70, label=exp, alpha=0.8)

ax.set_xlabel("Privacy Score (higher = more private)", fontsize=12)
ax.set_ylabel("Utility Score (higher = better expression)", fontsize=12)
ax.set_title("Combined Privacy–Utility Pareto Frontier", fontsize=13)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(str(RESULTS / "combined_frontier.png"), dpi=150)
plt.show()
print("Saved: results/combined_frontier.png")

## 8 — Realism Comparison

In [None]:
realism_cols = ["anonymizer", "params", "lpips_mean", "psnr_mean", "ssim_mean"]
print("=== Classical Baselines ===")
display(df_baseline[realism_cols].sort_values("lpips_mean"))
print("\n=== k-Same ===")
display(df_ksame[realism_cols].sort_values("lpips_mean"))
print("\n=== GAN Methods ===")
display(df_gan[realism_cols].sort_values("lpips_mean"))

## 9 — Summary Statistics

In [None]:
print(f"Total experiment runs:  {len(df_all)}")
print(f"  Baseline sweep:       {len(df_baseline)}")
print(f"  k-Same sweep:         {len(df_ksame)}")
print(f"  GAN comparison:       {len(df_gan)}")
print(f"  Conditioning ablation:{len(df_cond)}")
print(f"  Expression-loss ablation: {len(df_expr)}")
if 'df_adaptive' in dir():
    print(f"  Adaptive attacker:    {len(df_adaptive)}")

# Best privacy
best_priv = df_all.loc[df_all["privacy_score"].idxmax()]
print(f"\nBest privacy:  {best_priv['anonymizer']} "
      f"(privacy={best_priv['privacy_score']:.2f}, "
      f"utility={best_priv['utility_score']:.2f})")

# Best utility
best_util = df_all.loc[df_all["utility_score"].idxmax()]
print(f"Best utility:  {best_util['anonymizer']} "
      f"(privacy={best_util['privacy_score']:.2f}, "
      f"utility={best_util['utility_score']:.2f})")

# Best trade-off (sum of scores)
df_all["combined"] = df_all["privacy_score"] + df_all["utility_score"]
best_combo = df_all.loc[df_all["combined"].idxmax()]
print(f"Best trade-off: {best_combo['anonymizer']} "
      f"(privacy={best_combo['privacy_score']:.2f}, "
      f"utility={best_combo['utility_score']:.2f}, "
      f"combined={best_combo['combined']:.2f})")

---

**End of Report**