In [None]:
import json
import requests
import os
import time
import re
import csv
from collections import Counter

import matplotlib.pyplot as plt
import numpy as np

# =========================
# CONFIG
# =========================
MODEL = "gpt-5.1"
INPUT_FILE = "history.json"
OUTPUT_CSV = "results.csv"
HEATMAP_FILE = "heatmap.png"

API_URL = "https://api.openai.com/v1/chat/completions"
API_KEY = os.getenv("OPENAI_API_KEY")

if not API_KEY:
    raise RuntimeError("❌ Set OPENAI_API_KEY as an environment variable")

# =========================
# PROMPT
# =========================
PROMPT = """
You are a legal-compliance evaluation assistant.

You will receive a JSON object that represents the output of a GDPR contradiction-checking system. Your role is to EVALUATE whether the system’s conclusion is logically and legally supported by its own explanation and evidence.

This is not an adversarial task. Do not try to invent flaws. Only answer NO if you can identify a clear, concrete error in the system’s reasoning.

Evaluation method:

1. Identify the system’s main conclusion:
   - Whether it claims a direct contradiction exists or not.

2. Trace the reasoning chain:
   - GDPR rule or principle cited
   - Claim made about the document
   - Evidence (quotes or references)

3. Check internal validity:
   - Does the cited GDPR rule actually support the claim made?
   - Does the evidence directly support the claim, or is it irrelevant/misaligned?
   - Are there logical leaps (e.g., claim goes beyond what the evidence shows)?

4. Acceptable cases for YES:
   - The reasoning is coherent and the evidence reasonably supports the conclusion, even if details are missing or the explanation is high-level.
   - The system correctly treats missing or vague information as “not a contradiction.”

5. Required cases for NO (must find at least one):
   - A misinterpretation of a GDPR rule or principle
   - Evidence that does not support the stated legal claim
   - A logical inconsistency between conclusion and explanation
   - A conclusion that contradicts the system’s own evidence or raw_data

Final output rule:
You MUST output ONLY ONE WORD:
YES
or
NO

Meaning:
- YES = The system’s conclusion is logically and legally supported by its own explanation and evidence.
- NO = You found a concrete logical or legal error in the system’s reasoning.

Do not include any explanations, formatting, or additional text.
"""

# =========================
# HELPERS
# =========================
def extract_yes_no(text: str) -> str:
    text = text.upper()
    match = re.search(r"\b(YES|NO)\b", text)
    if match:
        return match.group(1)
    return "INVALID"


def ask_chatgpt(json_item, retries=3, delay=3):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": MODEL,
        "messages": [
            {"role": "system", "content": PROMPT},
            {"role": "user", "content": json.dumps(json_item, indent=2)}
        ],
        "temperature": 0
    }

    for attempt in range(1, retries + 1):
        try:
            response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
            response.raise_for_status()

            raw_output = response.json()["choices"][0]["message"]["content"].strip()
            return extract_yes_no(raw_output)

        except Exception as e:
            print(f"⚠️ Attempt {attempt} failed:", e)
            if attempt < retries:
                time.sleep(delay)

    return "ERROR"


# =========================
# HEATMAP
# =========================
def draw_heatmap(percentages, output_file):
    labels = list(percentages.keys())
    values = list(percentages.values())

    data = np.array([values])

    fig, ax = plt.subplots(figsize=(8, 2.5))
    im = ax.imshow(data)

    ax.set_xticks(np.arange(len(labels)))
    ax.set_xticklabels(labels)
    ax.set_yticks([0])
    ax.set_yticklabels(["Distribution"])

    # γράφει τα ποσοστά μέσα στα κουτάκια
    for i in range(len(labels)):
        ax.text(i, 0, f"{values[i]:.1f}%",
                ha="center", va="center", color="black", fontsize=12, fontweight="bold")

    ax.set_title("Compliance Status Heatmap (%)")
    fig.tight_layout()
    plt.savefig(output_file, dpi=200)
    plt.close()

# =========================
# MAIN
# =========================
def main():
    with open(INPUT_FILE, "r", encoding="utf-8") as f:
        data = json.load(f)

    total = 0
    yes_count = 0
    no_count = 0

    status_counter = Counter()
    rows = []

    for batch in data:
        results = batch.get("results", [])

        for item in results:
            total += 1
            reg_id = item.get("regulation_id", "UNKNOWN")

            compliance_status = item.get("compliance_status", "UNKNOWN")
            status_counter[compliance_status] += 1

            print(f"Checking {reg_id}...")

            verdict = ask_chatgpt(item)
            print("Model verdict:", verdict)

            if verdict == "YES":
                yes_count += 1
            elif verdict == "NO":
                no_count += 1

            rows.append({
                "regulation_id": reg_id,
                "verdict": verdict,
                "compliance_status": compliance_status
            })

            time.sleep(1.2)

    if total == 0:
        print("❌ No items found.")
        return

    yes_percent = (yes_count / total) * 100
    no_percent = (no_count / total) * 100

    # =========================
    # STATUS PERCENTAGES
    # =========================
    categories = [
        "COMPLIANT",
        "NON_COMPLIANT",
        "INSUFFICIENT_INFORMATION",
        "HUMAN_REQUIRED"
    ]

    status_percentages = {}
    for cat in categories:
        count = status_counter.get(cat, 0)
        status_percentages[cat] = (count / total) * 100

    # =========================
    # SAVE CSV
    # =========================
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as csvfile:
        fieldnames = ["regulation_id", "verdict", "compliance_status"]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            writer.writerow(row)



    # =========================
    # FINAL OUTPUT
    # =========================
    print("\n======================")
    print("FINAL RESULTS")
    print("======================")
    print(f"Total checks: {total}")
    print(f"YES: {yes_count} ({yes_percent:.2f}%)")
    print(f"NO: {no_count} ({no_percent:.2f}%)")
    print("\nCompliance Distribution:")
    for k, v in status_percentages.items():
        print(f"{k}: {v:.2f}%")
    print(f"\nCSV saved as: {OUTPUT_CSV}")


main()

Checking Regulation (EU) 2016/679...
Model verdict: YES
Checking GDPR Article 1...
Model verdict: YES
Checking GDPR Article 2...
Model verdict: YES
Checking GDPR Article 3...
Model verdict: YES
Checking GDPR Article 4...
Model verdict: YES
Checking GDPR Article 5...
Model verdict: YES
Checking GDPR Article 6...
Model verdict: YES
Checking GDPR Article 7...
Model verdict: YES
Checking GDPR Article 8...
Model verdict: YES
Checking GDPR Article 9...
Model verdict: YES
Checking Regulation (EU) 2016/679...
Model verdict: YES
Checking GDPR Article 1...
Model verdict: YES
Checking GDPR Article 2...
Model verdict: YES
Checking GDPR Article 3...
Model verdict: YES
Checking GDPR Article 4...
Model verdict: YES
Checking GDPR Article 5...
Model verdict: YES
Checking GDPR Article 6...
Model verdict: YES
Checking GDPR Article 7...
Model verdict: YES
Checking GDPR Article 8...
Model verdict: YES
Checking GDPR Article 9...
Model verdict: YES
Checking Regulation (EU) 2016/679...
Model verdict: YES
Chec