
# 04 · Privacy Risk → ROI (with Integrated PDF Reports)

This notebook consumes outputs from **02** and **03**, computes ROI, saves figures, and now **builds two PDFs**:
- `reports/privacy_compliance_report_v2.pdf` (full)
- `reports/privacy_compliance_summary_v2.pdf` (1‑pager)

> Uses ReportLab if available; otherwise you can export the `roi_readout.md` created by Notebook 03.


In [8]:
!pip install reportlab

Collecting reportlab
  Downloading reportlab-4.4.3-py3-none-any.whl.metadata (1.7 kB)
Downloading reportlab-4.4.3-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Successfully installed reportlab-4.4.3


In [10]:

import os, json, math, pandas as pd, numpy as np, matplotlib.pyplot as plt
from pathlib import Path
from datetime import datetime

# Robust repo_root detection
cwd = Path.cwd()
if (cwd / "notebooks").exists():
    repo_root = cwd
elif (cwd.name == "notebooks") and (cwd.parent / "data").exists():
    repo_root = cwd.parent
else:
    probe = cwd
    for _ in range(5):
        if (probe / "data").exists():
            repo_root = probe; break
        probe = probe.parent
    else:
        repo_root = cwd

DATA_DIR = repo_root / "data"
ASSETS   = repo_root / "reports" / "assets"
REPORTS  = repo_root / "reports"
for p in (DATA_DIR, ASSETS, REPORTS): p.mkdir(parents=True, exist_ok=True)

print("repo_root =", repo_root)

# Load upstream JSONs
p02 = json.loads((DATA_DIR / "deidentification_scorecard.json").read_text())
p03 = json.loads((DATA_DIR / "privacy_compliance_report.json").read_text())

k_after  = float(p02.get("after", {}).get("k", np.nan))
l_after  = float(p02.get("after", {}).get("l", np.nan))
quasi    = p02.get("quasi", {}).get("final", [])
sensitive= p02.get("sensitive_col")

comp_idx = float(p03.get("compliance_index", np.nan))
gdpr_risk= float(p03.get("gdpr", {}).get("detail", {}).get("risk_index", np.nan))
hipaa_ok = bool(p03.get("hipaa", {}).get("ok", False))

print(f"Posture: k={k_after}, l={l_after}, |quasi|={len(quasi)}, sensitive={sensitive}")
print(f"Compliance: index={comp_idx:.3f} HIPAA_OK={hipaa_ok} GDPR_risk={gdpr_risk:.3f}")

# ROI functions (fallbacks)
def expected_loss(prob_incident: float, loss: float) -> float: return prob_incident * loss
def time_benefit_value(months_saved: float, monthly_value: float, years: float, rate: float=0.08) -> float:
    if months_saved<=0: return 0.0
    n = int(round(min(years, 5)*12))
    cf = months_saved*monthly_value
    return sum(cf/((1+rate)**(t/12)) for t in range(1, n+1))
def net_benefit(loss_wo, loss_w, time_ben, cost, rate=0.08) -> dict:
    avoided = loss_wo - loss_w
    nb = avoided + time_ben - cost
    bcr = (avoided + time_ben)/cost if cost>0 else float('inf')
    return {"avoided_loss": avoided, "time_benefit": time_ben, "cost_npv": cost, "net_benefit": nb, "bcr": bcr}

# Parameters (edit as needed)
params = {
    "prob_incident_wo": 0.22,
    "prob_incident_w" : 0.06,
    "incident_loss_usd": 2_000_000,
    "months_saved"     : 0,
    "monthly_value"    : 500_000,
    "safeguard_cost_npv": 250_000,
    "rate": 0.08,
    "horizon_years": 2,
}
loss_wo = expected_loss(params["prob_incident_wo"], params["incident_loss_usd"])
loss_w  = expected_loss(params["prob_incident_w"],  params["incident_loss_usd"])
tval    = time_benefit_value(params["months_saved"], params["monthly_value"], years=params["horizon_years"], rate=params["rate"])
roi     = net_benefit(loss_wo, loss_w, tval, params["safeguard_cost_npv"], rate=params["rate"])

# Persist ROI summary (consumed by PDF build)
summary = {
    "generated_at": datetime.utcnow().isoformat()+"Z",
    "inputs": {"k": k_after, "l": l_after, "quasi": quasi, "gdpr_risk": gdpr_risk, "hipaa_ok": hipaa_ok, "compliance_index": comp_idx},
    "params": params,
    "roi": roi
}
(DATA_DIR / "privacy_roi_summary.json").write_text(json.dumps(summary, indent=2))
print("Wrote →", DATA_DIR / "privacy_roi_summary.json")

# Minimal figures (if they don't already exist)
fig = plt.figure(figsize=(6,3.4))
bars = [loss_wo, -(loss_wo-loss_w), -params["safeguard_cost_npv"], tval]
labels= ["Expected Loss (WO)", "Avoided Loss", "Safeguard Cost", "Time Benefit"]
plt.bar(range(len(bars)), bars); plt.xticks(range(len(bars)), labels, rotation=15)
plt.title("ROI Waterfall"); plt.ylabel("USD")
(ASSETS / "roi_waterfall.png").parent.mkdir(parents=True, exist_ok=True)
plt.savefig(ASSETS / "roi_waterfall.png", bbox_inches="tight", dpi=150); plt.close()

# Simple tornado using ±25% on key drivers
fig2 = plt.figure(figsize=(6,3.4))
drivers = ["prob_incident_wo","incident_loss_usd","safeguard_cost_npv"]
deltas=[]
for d in drivers:
    p_up = dict(params); p_dn = dict(params)
    p_up[d] *= 1.25; p_dn[d] *= 0.75
    lwo_up = expected_loss(p_up["prob_incident_wo"], p_up["incident_loss_usd"])
    nb_up  = net_benefit(lwo_up, loss_w, tval, p_up["safeguard_cost_npv"])["net_benefit"]
    lwo_dn = expected_loss(p_dn["prob_incident_wo"], p_dn["incident_loss_usd"])
    nb_dn  = net_benefit(lwo_dn, loss_w, tval, p_dn["safeguard_cost_npv"])["net_benefit"]
    deltas.append((d, min(nb_up, nb_dn), max(nb_up, nb_dn)))
y = np.arange(len(drivers))
plt.hlines(y, [lo for _,lo,hi in deltas], [hi for _,lo,hi in deltas])
plt.yticks(y, drivers); plt.title("Sensitivity (±25%)"); plt.xlabel("Net Benefit USD")
plt.savefig(ASSETS / "sensitivity_tornado.png", bbox_inches="tight", dpi=150); plt.close()
print("Figures saved to", ASSETS)

repo_root = /Users/cmontefusco/Coding projects/RWE-Privacy-and-Compliance-Playbook
Posture: k=97.0, l=7.0, |quasi|=5, sensitive=condition
Compliance: index=0.906 HIPAA_OK=True GDPR_risk=0.235
Wrote → /Users/cmontefusco/Coding projects/RWE-Privacy-and-Compliance-Playbook/data/privacy_roi_summary.json


  "generated_at": datetime.utcnow().isoformat()+"Z",


Figures saved to /Users/cmontefusco/Coding projects/RWE-Privacy-and-Compliance-Playbook/reports/assets


## Build PDFs (full report + 1‑page summary)

In [13]:

# Build corrected PDFs using annexed JSONs and figures from assets/
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors

# Load all JSONs
p01 = json.loads((DATA_DIR / "privacy_report.json").read_text()) if (DATA_DIR / "privacy_report.json").exists() else {}
p02 = json.loads((DATA_DIR / "deidentification_scorecard.json").read_text())
p03 = json.loads((DATA_DIR / "privacy_compliance_report.json").read_text())
p04 = json.loads((DATA_DIR / "privacy_roi_summary.json").read_text())
sens_path = DATA_DIR / "privacy_sensitivity.json"
sens = json.loads(sens_path.read_text()) if sens_path.exists() else []

# Extract values
k = p02.get("after", {}).get("k", float('nan'))
l = p02.get("after", {}).get("l", float('nan'))
gdpr_risk = p03.get("gdpr", {}).get("detail", {}).get("risk_index", float('nan'))
hipaa_ok = bool(p03.get("hipaa", {}).get("ok", False))
comp_index = p03.get("compliance_index", float('nan'))
roi = p04.get("roi", {})
params = p04.get("params", {})

# Derived expected losses
el_wo = params.get("prob_incident_wo", 0)*params.get("incident_loss_usd", 0)
el_w  = params.get("prob_incident_w", 0)*params.get("incident_loss_usd", 0)

# Sensitivity spans (if provided)
from collections import defaultdict
spans = defaultdict(lambda: {"min": float("inf"), "max": -float("inf")})
for row in sens:
    p = row["param"]; nb = row["net_benefit"]
    spans[p]["min"] = min(spans[p]["min"], nb)
    spans[p]["max"] = max(spans[p]["max"], nb)
sens_rows = [(p, v["min"], v["max"], v["max"]-v["min"]) for p,v in spans.items()]
sens_rows.sort(key=lambda x: x[3], reverse=True)

# Images
img_idmap   = ASSETS / "identifier_map.png"
img_comp    = ASSETS / "compliance_bar.png"
img_water   = ASSETS / "roi_waterfall.png"
img_tornado = ASSETS / "sensitivity_tornado.png"

# Helper to style tables
def tbl(data, colWidths=None):
    t = Table(data, hAlign="LEFT", colWidths=colWidths)
    t.setStyle(TableStyle([('BACKGROUND',(0,0),(-1,0),colors.HexColor("#4b5563")),
                           ('TEXTCOLOR',(0,0),(-1,0),colors.whitesmoke),
                           ('GRID',(0,0),(-1,-1),0.5,colors.grey),
                           ('ALIGN',(1,1),(-1,-1),'CENTER')]))
    return t

styles = getSampleStyleSheet()

# ---- Full report ----
full_pdf = REPORTS / "privacy_compliance_report_v2.pdf"
doc = SimpleDocTemplate(str(full_pdf), pagesize=A4)
story = []
story.append(Paragraph("Privacy & Compliance Readout — Corrected", styles["Title"]))
story.append(Paragraph(f"Generated: {datetime.now():%Y-%m-%d %H:%M}", styles["BodyText"]))
story.append(Spacer(1, 10))

story.append(Paragraph("Privacy & Compliance Metrics", styles["Heading2"]))
story.append(tbl([
    ["Metric","Value","Target/Note"],
    ["k-anonymity", f"{k:.0f}", "≥ 5"],
    ["l-diversity", f"{l:.0f}", "≥ 2"],
    ["GDPR risk index", f"{gdpr_risk:.3f}", "≤ 0.30 desirable"],
    ["HIPAA Safe Harbor", "PASS" if hipaa_ok else "REVIEW", ""],
    ["Compliance index", f"{comp_index:.3f}", "0–1, higher is better"],
]))
story.append(Spacer(1, 10))

story.append(Paragraph("ROI of Safeguards", styles["Heading2"]))
story.append(tbl([
    ["Component","Value (USD)"],
    ["Expected loss (without)", f"${el_wo:,.0f}"],
    ["Expected loss (with)",    f"${el_w:,.0f}"],
    ["Avoided loss",            f"${roi.get('avoided_loss',0):,.0f}"],
    ["Time benefit",            f"${roi.get('time_benefit',0):,.0f}"],
    ["Safeguard cost (NPV)",    f"${roi.get('cost_npv',0):,.0f}"],
    ["Net benefit",             f"${roi.get('net_benefit',0):,.0f}"],
    ["Benefit-Cost Ratio",      f"{roi.get('bcr',0):.2f}"],
]))
story.append(Spacer(1, 10))

if sens_rows:
    story.append(Paragraph("Sensitivity (Net Benefit span)", styles["Heading2"]))
    rows = [["Parameter","Min NB","Max NB","Range"]] + [[p, f"${mn:,.0f}", f"${mx:,.0f}", f"${sp:,.0f}"] for p,mn,mx,sp in sens_rows[:6]]
    story.append(tbl(rows, colWidths=[220, 100, 100, 100]))
    story.append(Spacer(1, 10))

story.append(Paragraph("Figures", styles["Heading2"]))
for img in [img_idmap, img_comp, img_water, img_tornado]:
    if img.exists():
        story.append(Image(str(img), width=420, height=260))
        story.append(Spacer(1, 8))

# Recommendations
story.append(Paragraph("Recommendations", styles["Heading2"]))
for r in [
    "Keep direct identifiers removed; rules validated against HIPAA Safe Harbor.",
    "k and l exceed targets — enforce generalization/suppression in ETL and recheck quarterly.",
    "Maintain GDPR risk index ≤0.30 by monitoring quasi-ID drift.",
    "Tune ROI (costs, probabilities, time benefit) per program to reflect real value."
]:
    story.append(Paragraph(f"• {r}", styles["BodyText"]))

doc.build(story)
print("PDF written →", full_pdf)

# ---- 1‑page summary ----
sum_pdf = REPORTS / "privacy_compliance_summary_v2.pdf"
doc2 = SimpleDocTemplate(str(sum_pdf), pagesize=A4)
s = []
s.append(Paragraph("Privacy & Compliance Executive Summary — Corrected", styles["Title"]))
s.append(Spacer(1, 8))
s.append(tbl([
    ["Metric","Value","Target"],
    ["k-anonymity", f"{k:.0f}", "≥5"],
    ["l-diversity", f"{l:.0f}", "≥2"],
    ["HIPAA", "PASS" if hipaa_ok else "REVIEW", "Safe Harbor"],
    ["GDPR risk index", f"{gdpr_risk:.2f}", "≤0.30"],
    ["Compliance Index", f"{comp_index:.2f}", "≥0.70"],
    ["ROI — Net Benefit", f"${roi.get('net_benefit',0):,.0f}", "Positive"],
]))
s.append(Spacer(1, 8))
if img_comp.exists(): s.append(Image(str(img_comp), width=320, height=180))
if img_water.exists(): s.append(Image(str(img_water), width=320, height=180))
s.append(Spacer(1, 6))
s.append(Paragraph("<b>Immediate Actions</b>", styles["Heading2"]))
for a in [
    "Codify generalization/suppression rules into pipelines; set quarterly reassessment.",
    "Attach this report to DUA approvals and payer evidence packages.",
    "Quantify time-to-approval benefit (months saved) and rerun ROI."
]:
    s.append(Paragraph(f"• {a}", styles["BodyText"]))
doc2.build(s)
print("Summary PDF written →", sum_pdf)

PDF written → /Users/cmontefusco/Coding projects/RWE-Privacy-and-Compliance-Playbook/reports/privacy_compliance_report_v2.pdf
Summary PDF written → /Users/cmontefusco/Coding projects/RWE-Privacy-and-Compliance-Playbook/reports/privacy_compliance_summary_v2.pdf
