## 1: Setup

In [None]:
import sys
import os
import json
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output, Markdown

# Load Enriched Data
RUN_DIR = sorted(os.listdir("../outputs/runs"))[-1]  # Latest run
INPUT_FILE = f"../outputs/runs/{RUN_DIR}/enriched_anomalies.json"
OUTPUT_FILE = "human_annotations.jsonl"

with open(INPUT_FILE, "r") as f:
    anomalies = json.load(f)

print(f"Loaded {len(anomalies)} anomalies from {RUN_DIR}")

Loaded 5 anomalies from run_20251126T133741Z_d9f6e3


## 2: Rating Interface

In [None]:
# Simple Rating UI
current_idx = 0
annotations = []


def save_annotations():
    with open(OUTPUT_FILE, "w") as f:
        for a in annotations:
            f.write(json.dumps(a) + "\n")
    print(f"\nâœ… Saved {len(annotations)} annotations to {OUTPUT_FILE}")


def show_next(b=None):
    global current_idx
    clear_output(wait=True)

    if current_idx >= len(anomalies):
        print("ðŸŽ‰ All anomalies reviewed!")
        save_annotations()
        return

    rec = anomalies[current_idx]

    # Display Context
    display(
        Markdown(
            f"### Anomaly {current_idx+1}/{len(anomalies)}: {rec.get('entity_id')}"
        )
    )
    display(
        Markdown(f"**Score:** {rec.get('score')} | **Metric:** {rec.get('metric')}")
    )
    display(Markdown(f"**AI Explanation:**\n> {rec.get('explanation_full')}"))
    display(Markdown(f"**Actions:** {rec.get('suggested_actions')}"))

    # Widgets
    w_score = widgets.IntSlider(value=3, min=1, max=5, description="Quality (1-5):")
    w_fact = widgets.Checkbox(description="Factually Correct?")
    w_btn = widgets.Button(description="Submit & Next", button_style="success")

    def on_submit(b):
        global current_idx
        annotations.append(
            {
                "anomaly_id": rec.get("anomaly_id"),
                "human_score": w_score.value,
                "factual": w_fact.value,
                "reviewer": "user",
            }
        )
        current_idx += 1
        show_next()

    w_btn.on_click(on_submit)
    display(w_score, w_fact, w_btn)


# Start
show_next()

### Anomaly 2/5: South

**Score:** 22.22 | **Metric:** Sales

**AI Explanation:**
> The South region has reported sales of 8,805.04, which is substantially higher than the expected value of 579.93. This value is 22.22 standard deviations above the expected value, indicating a severe anomaly. There is no historical context to explain this drastic increase.

**Actions:** ['Investigate the source of the inflated sales in the South region.', 'Verify data integrity and reporting mechanisms for the South region.', 'Analyze if this anomaly represents a genuine surge or a data error.']

IntSlider(value=3, description='Quality (1-5):', max=5, min=1)

Checkbox(value=False, description='Factually Correct?')

Button(button_style='success', description='Submit & Next', style=ButtonStyle())