In [130]:
## THIS ONE?

In [226]:
import requests
import pandas as pd
import time
import json
import os
import re 
import re

import json# <-- Make sure this is included at the top!

# ========== CONFIG ==========
LLM_ENDPOINT = "http://localhost:11434/api/chat"
LLM_MODEL_NAME = "llama3:70b"
TEMP_OUTPUT_PATH = "annotated_temp_output.csv"
FINAL_OUTPUT_PATH = "news_sample_annotated.csv"
SLEEP_BETWEEN_REQUESTS = 1
PROMPT_DIR = "prompts"
TEMPERATURE = 0.0

# ========== FRAME ORDER ==========
FRAME_ORDER = [
    "Foreign influence threat",
    "Systemic institutional corruption",
    "Elite collusion",
    "Politicized investigations",
    "Authoritarian reformism",
    "Judicial and institutional accountability failures",
    "Mobilizing anti-corruption"
]

# --- Helper Functions ---

def strip_markdown_fences(output):
    """Remove markdown-style code fences from model output."""
    return re.sub(r"```(?:json)?|```", "", output).strip()


def extract_json_from_output(output):
    """
    Safely extract a JSON array from model output.
    Handles extra text, markdown, and formatting errors.
    """
    output = strip_markdown_fences(output)

    try:
        # Try parsing directly
        return json.loads(output)
    except json.JSONDecodeError:
        try:
            # Try extracting just the first JSON array
            json_str = re.search(r'\[\s*{.*?}\s*\]', output, re.DOTALL).group()
            return json.loads(json_str)
        except Exception as e:
            print(f"❌ JSON decode failed: {e}")
            print("🔍 Raw output that caused error (truncated):")
            print(output[:500])
            return None
            
# ========== LOAD PROMPTS PER FRAME ==========
def load_frame_prompt(index: int, frame_name: str) -> str:
    filename = f"frame_{index}_{frame_name.lower().replace(' ', '_').replace('-', '')}.txt"
    path = os.path.join(PROMPT_DIR, filename)
    with open(path, "r", encoding="utf-8") as f:
        return f.read()

# ========== PROMPT CONSTRUCTION ==========
def build_prompt(article_text: str, frame_index: int, frame_name: str) -> str:
    frame_prompt = load_frame_prompt(frame_index, frame_name)
    return f"{frame_prompt}\n\n---\n\nArticle:\n{article_text}"

def sanitize_double_quotes(json_str):
    """
    Fix common LLM quote formatting issues like nested double quotes.
    Turns:  "evidence": ""This will..." → "evidence": "This will..."
    """
    return re.sub(r'"\s*"\s*([^"]+)"', r'"\1"', json_str)

def clean_llm_response(content):
    """
    Cleans and parses LLM response content into a JSON object.
    Handles:
    - Markdown code fences
    - Overquoted fields
    - Stray text around JSON array
    - Common decode errors
    """
    content = content.strip()
    content = re.sub(r"```(?:json)?|```", "", content).strip()

    # First try: parse whole response as JSON
    try:
        return json.loads(content)
    except json.JSONDecodeError:
        pass

    # Second try: extract the first array-like JSON block
    match = re.search(r'\[\s*{.*?}\s*\]', content, re.DOTALL)
    if match:
        json_str = match.group()
        json_str = sanitize_double_quotes(json_str)

        try:
            return json.loads(json_str)
        except json.JSONDecodeError as e:
            print(f"⚠️ Still couldn't parse extracted JSON: {e}")
            print("🔍 JSON candidate (truncated):")
            print(json_str[:500])
            return None
    else:
        print("⚠️ No JSON array found in LLM output.")
        print("🔍 Raw LLM output (truncated):")
        print(content[:1000])
        return None


def query_frame_llm(article_text: str, frame_index: int, frame_name: str) -> dict:
    prompt = build_prompt(article_text, frame_index, frame_name)
    try:
        response = requests.post(
            LLM_ENDPOINT,
            json={
                "model": LLM_MODEL_NAME,
                "messages": [{"role": "user", "content": prompt}],
                "stream": False,
                "temperature": TEMPERATURE
            },
            timeout=120
        )
        response.raise_for_status()
        result = response.json()
        content = result.get("message", {}).get("content", "").strip()

        if not content:
            raise ValueError("Empty response from LLM")

        parsed = clean_llm_response(content)
        if not parsed or not isinstance(parsed, list):
            raise ValueError("No valid JSON array found or parsed content is not a list")

        return parsed[0] if parsed else {}

    except Exception as e:
        print(f"❌ Error querying frame '{frame_name}': {e}")
        return {
            "frame": frame_name,
            "rationale": f"⚠️ Error: {str(e)}",
            "confidence": None,
            "evidence": ""
        }


# ========== ANNOTATION LOOP ==========
def annotate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    for i in range(1, len(FRAME_ORDER) + 1):
        for field in ["name", "rationale", "confidence", "evidence"]:
            col = f"frame_{i}_{field}"
            if col not in df.columns:
                df[col] = ""

    for idx, row in df.iterrows():
        if pd.notna(row.get("frame_1_name")) and row.get("frame_1_name") != "":
            print(f"⏭️ Article {idx} already annotated. Skipping.")
            continue

        print(f"\n🔍 Annotating article {idx}...")
        article_text = row.get("translated_text", "")

        for i, frame_name in enumerate(FRAME_ORDER, 1):
            result = query_frame_llm(article_text, i, frame_name)

            # Only log frames with rationale and frame label
            if result.get("frame") and result.get("rationale"):
                confidence = result.get("confidence", "")
                rationale = result.get("rationale", "")
                if confidence != "" and isinstance(confidence, (int, float)) and confidence < 80:
                    rationale += f"\n\n⚠️ Model confidence is only {confidence}%. Please verify carefully."

                df.at[idx, f"frame_{i}_name"] = result.get("frame", "")
                df.at[idx, f"frame_{i}_rationale"] = rationale
                df.at[idx, f"frame_{i}_confidence"] = confidence
                df.at[idx, f"frame_{i}_evidence"] = result.get("evidence", "")


        df.to_csv(TEMP_OUTPUT_PATH, index=False)
        print(f"✅ Saved progress after article {idx}.")
        time.sleep(SLEEP_BETWEEN_REQUESTS)

    return df

In [206]:
import pandas as pd
import os

# Path to your existing raw ICR2 sample
icr2_sample_path = os.path.expanduser(
    "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/icr2_sample_raw.csv"
)

# Load the already drawn raw sample
df_icr2 = pd.read_csv(icr2_sample_path)
print(f"📂 Loaded existing raw ICR2 sample from: {icr2_sample_path}")

#Annotate
df_icr2 = annotate_dataframe(df_icr2)

# Save annotated ICR 2 sample
icr2_annotated_path = os.path.expanduser(
    "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/icr2_sample_LLM_annotated.csv"
)
df_icr2.to_csv(icr2_annotated_path, index=False)
print(f"✅ Annotated ICR 2 saved to: {icr2_annotated_path}")

# Save Excel version of ICR 2
icr2_excel_path = icr2_annotated_path.replace(".csv", ".xlsx")
df_icr2.to_excel(icr2_excel_path, index=False)
print(f"✅ Excel-bestand ICR 2 opgeslagen als: {icr2_excel_path}")

📂 Loaded existing raw ICR2 sample from: /home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/icr2_sample_raw.csv

🔍 Annotating article 0...
✅ Saved progress after article 0.

🔍 Annotating article 1...
✅ Saved progress after article 1.

🔍 Annotating article 2...
✅ Saved progress after article 2.

🔍 Annotating article 3...
✅ Saved progress after article 3.

🔍 Annotating article 4...
✅ Saved progress after article 4.

🔍 Annotating article 5...
✅ Saved progress after article 5.

🔍 Annotating article 6...
✅ Saved progress after article 6.

🔍 Annotating article 7...
✅ Saved progress after article 7.

🔍 Annotating article 8...
✅ Saved progress after article 8.

🔍 Annotating article 9...
✅ Saved progress after article 9.

🔍 Annotating article 10...
✅ Saved progress after article 10.

🔍 Annotating article 11...
✅ Saved progress after article 11.

🔍 Annotating article 12...
✅ Saved progress after article 12.

🔍 Annotating article 13...
✅ Sa

In [228]:
csv_files = [
    "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/output/data-deductive-analysis/sample-manual-content-analysis/Bulgaria_Alexander_sample_250.csv"
]


In [None]:
icr3_sample_path = os.path.expanduser(
    "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test3/icr3_sample_raw.csv"
)
df_icr3 = pd.read_csv(icr3_sample_path)
print(f"📂 Loaded existing raw ICR2 sample from: {icr3_sample_path}")

df_icr3 = annotate_dataframe(df_icr3)

df_icr3.to_csv(icr3_annotated_path, index=False)
print(f"✅ Annotated ICR 3 saved to: {icr3_annotated_path}")

# Save Excel version of ICR 2
icr3_excel_path = icr3_annotated_path.replace(".csv", ".xlsx")
df_icr3.to_excel(icr3_excel_path, index=False)
print(f"✅ Excel-bestand ICR 3opgeslagen als: {icr3_excel_path}")

In [216]:
import pandas as pd
import os

def generate_annotation_html(df, output_path):
    html_content = """
    <html>
    <head>
        <meta charset="UTF-8">
        <title>Annotated Articles Report</title>
        <style>
            body { font-family: Arial, sans-serif; margin: 40px; line-height: 1.6; }
            .article { margin-bottom: 50px; padding-bottom: 20px; border-bottom: 1px solid #ccc; }
            .frame { margin-left: 20px; margin-top: 10px; }
            .header { font-size: 20px; font-weight: bold; color: #333; }
            .label { font-weight: bold; color: #004080; }
            .section { margin-top: 10px; }
            .meta { color: #555; font-size: 14px; }
        </style>
    </head>
    <body>
    <h1>📝 Annotated Articles with Frames & Rationales</h1>
    """

    for idx, row in df.iterrows():
        html_content += f'<div class="article">'
        html_content += f'<div class="header">📄 Article #{idx + 1}</div>'
        html_content += f'<div class="meta">🌍 <b>Country:</b> {row.get("country", "N/A")} &nbsp;&nbsp; 🕒 <b>DateTime:</b> {row.get("dateTime", "N/A")} &nbsp;&nbsp; 🔗 <b>Source:</b> {row.get("source.uri", "N/A")}</div>'
        html_content += f'<div class="section"><span class="label">📰 Text:</span><br>{row.get("translated_text", "N/A")}</div>'

        for n in range(1, 8):
            frame_name = row.get(f'frame_{n}_name', '')
            rationale = row.get(f'frame_{n}_rationale', '')
            evidence = row.get(f'frame_{n}_evidence', '')
            confidence = row.get(f'frame_{n}_confidence', 'N/A')

            # Skip if frame is None, empty, or evidence is missing
            if (
                pd.notna(frame_name) and frame_name.strip().lower() != "none" and frame_name.strip() and
                pd.notna(evidence) and evidence.strip()
            ):
                html_content += f'<div class="frame">'
                html_content += f'<div><span class="label">🗂️ Frame {n}:</span> {frame_name}</div>'
                if pd.notna(rationale) and rationale.strip():
                    html_content += f'<div><span class="label">💡 Rationale:</span> {rationale}</div>'
                html_content += f'<div><span class="label">🔍 Evidence:</span> {evidence}</div>'
                html_content += f'<div><span class="label">📈 Confidence:</span> {confidence}</div>'
                html_content += f'</div>'

        html_content += '</div>'

    html_content += """
    </body>
    </html>
    """

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(html_content)

    print(f"✅ HTML report saved to: {output_path}")


# Example usage (adjust paths and dataframes accordingly)
output_file = os.path.expanduser("/home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/icr2_annotated_report.html")
generate_annotation_html(df_icr2, output_file)

output_file = os.path.expanduser("/home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test3/icr3_annotated_report.html")
generate_annotation_html(df_icr3, output_file)


✅ HTML report saved to: /home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/icr2_annotated_report.html
✅ HTML report saved to: /home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test3/icr3_annotated_report.html


In [15]:
# ======== COMPARE TO YARA ========
print("\n=== Evaluation ===")
gold = pd.read_csv(GOLD_STANDARD_PATH).set_index("uri")
updated = pd.read_csv(UPDATED_OUTPUT_PATH).set_index("uri")


=== Evaluation ===


## ERROR ANALYSIS

In [194]:
import os
import json
import pandas as pd
import requests
from sklearn.metrics import classification_report

# ======== CONFIG ========
LLM_ENDPOINT = "http://localhost:11434/api/chat"
LLM_MODEL_NAME = "llama3:70b"
#FRAME_INDEX = 2
#FRAME_INDEX = 3
#FRAME_INDEX = 4
#FRAME_INDEX = 5
FRAME_INDEX = 6
#FRAME_NAME = "Systemic institutional corruption"
#FRAME_NAME = "Elite collusion"
#FRAME_NAME = "Politicized investigations"
#FRAME_NAME = "Authoritarian reformism"
FRAME_NAME = 'Judicial and institutional accountability failures'
PROMPT_DIR = "prompts"
TEMPERATURE = 0.0

SESSION_FOLDER = os.path.expanduser("~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/sessions/")

INPUT_PATH = os.path.expanduser(
    "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/icr2_sample_raw.csv"
)
UPDATED_OUTPUT_PATH = os.path.expanduser(
    "~/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/single_frame_updated.csv"
)

# ======== FRAME COLUMN MAPPING ========
FRAME_COLUMN = {
    "Foreign influence threat": "Foreign influence threat_present",
    "Systemic institutional corruption": "Systemic institutional corruption_present",
    "Elite collusion": "Elite collusion_present",
    "Politicized investigations": "Politicized investigations_present",
    "Authoritarian reformism": "Authoritarian reformism_present",
    "Judicial and institutional accountability failures": "Judicial and institutional accountability failures_present",
    "Mobilizing anti-corruption": "Mobilizing anti-corruption_present"
}[FRAME_NAME]

# ======== HELPERS ========
def encode_label(val):
    return 1 if val == "Present" else 0 if val == "Not Present" else None

def load_prompt(index, name):
    filename = f"frame_{index}_{name.lower().replace(' ', '_').replace('-', '')}.txt"
    path = os.path.join(PROMPT_DIR, filename)
    with open(path, "r", encoding="utf-8") as f:
        return f.read()

def build_prompt(article_text):
    prompt_text = load_prompt(FRAME_INDEX, FRAME_NAME)
    return f"{prompt_text}\n\n---\n\nArticle:\n{article_text}"

def query_llm(article_text):
    prompt = build_prompt(article_text)
    try:
        response = requests.post(
            LLM_ENDPOINT,
            json={
                "model": LLM_MODEL_NAME,
                "messages": [{"role": "user", "content": prompt}],
                "stream": False,
                "temperature": TEMPERATURE
            },
            timeout=120
        )
        response.raise_for_status()
        content = response.json().get("message", {}).get("content", "").strip()
        json_start = content.find("[")
        json_end = content.rfind("]") + 1
        parsed = json.loads(content[json_start:json_end])
        return parsed[0] if parsed else {}
    except Exception as e:
        print(f"❌ Query failed: {e}")
        return {"frame": "None", "rationale": str(e), "confidence": None}

# ======== LLM ANNOTATION ========
df = pd.read_csv(INPUT_PATH)
col_name = f"frame_{FRAME_INDEX}_name"
df[col_name] = ""
df[f"frame_{FRAME_INDEX}_rationale"] = ""
df[f"frame_{FRAME_INDEX}_confidence"] = ""

for idx, row in df.iterrows():
    print(f"🔍 Annotating row {idx}")
    result = query_llm(row["translated_text"])
    df.at[idx, col_name] = result.get("frame", "")
    df.at[idx, f"frame_{FRAME_INDEX}_rationale"] = result.get("rationale", "")
    df.at[idx, f"frame_{FRAME_INDEX}_confidence"] = result.get("confidence", "")

df.to_csv(UPDATED_OUTPUT_PATH, index=False)
print(f"✅ Saved LLM results to {UPDATED_OUTPUT_PATH}")

🔍 Annotating row 0
🔍 Annotating row 1
🔍 Annotating row 2
🔍 Annotating row 3
🔍 Annotating row 4
🔍 Annotating row 5
🔍 Annotating row 6
🔍 Annotating row 7
🔍 Annotating row 8
🔍 Annotating row 9
🔍 Annotating row 10
🔍 Annotating row 11
🔍 Annotating row 12
🔍 Annotating row 13
🔍 Annotating row 14
✅ Saved LLM results to /home/akroon/webdav/ASCOR-FMG-5580-RESPOND-news-data (Projectfolder)/annotations/coding_frames/ICR/ICR_test2/single_frame_updated.csv


In [195]:
# ======== LOAD YARA'S GOLD STANDARD FROM JSON ========
def load_all_annotations():
    data = []

    allowed_annotators = ["Assia", "Alexander", "Elisa", "Luigia", "Yara"]
    allowed_set = set(a.lower() for a in allowed_annotators)

    for filename in os.listdir(SESSION_FOLDER):
        if filename.endswith("_session_icr2.json"):
            user_id = filename.replace("_session_icr2.json", "")
            user_id_lower = user_id.lower()

            if user_id_lower not in allowed_set:
                print(f"⚠️ Skipping unknown annotator: {user_id}")
                continue

            path = os.path.join(SESSION_FOLDER, filename)
            with open(path, "r", encoding="utf-8") as f:
                session_data = json.load(f)
                annotations = session_data.get("annotations", [])
                for ann in annotations:
                    ann["user_id"] = user_id
                    data.append(ann)

    df = pd.DataFrame(data)
    annotator_counts = df.groupby("user_id")["uri"].nunique()
    complete_annotators = annotator_counts[annotator_counts == 15].index.tolist()
    df = df[df["user_id"].isin(complete_annotators)]

    print(f"✅ Annotators who coded all 15 articles: {complete_annotators}")
    return df

# ======== COMPARE TO YARA ========
annotations_df = load_all_annotations()
yara_df = annotations_df[annotations_df["user_id"].str.lower() == "yara"]

# Get Yara's label for this frame
yara_df["label"] = yara_df[FRAME_COLUMN].apply(encode_label)
yara_gold = yara_df[["uri", "label"]].dropna().set_index("uri")

# Load LLM predictions
llm_df = pd.read_csv(UPDATED_OUTPUT_PATH).set_index("uri")
llm_pred = llm_df[f"frame_{FRAME_INDEX}_name"].fillna("").apply(
    lambda x: 1 if x.strip().lower() == FRAME_NAME.lower() else 0
)

# Align and compare
aligned = yara_gold.join(llm_pred.rename("llm_pred"), how="inner")

print("\n=== Evaluation against Yara ===")
if aligned.empty:
    print("❌ No overlapping URIs found between LLM and Yara annotations.")
else:
    print(classification_report(aligned["label"], aligned["llm_pred"], target_names=["Not Present", "Present"]))

⚠️ Skipping unknown annotator: Anne
✅ Annotators who coded all 15 articles: ['Yara']

=== Evaluation against Yara ===
              precision    recall  f1-score   support

 Not Present       1.00      0.92      0.96        12
     Present       0.75      1.00      0.86         3

    accuracy                           0.93        15
   macro avg       0.88      0.96      0.91        15
weighted avg       0.95      0.93      0.94        15



In [196]:

# ======== Extract disagreement cases for prompt improvement ========
import pandas as pd

# Reload all to ensure synced state
llm_df = pd.read_csv(UPDATED_OUTPUT_PATH)
llm_df.set_index("uri", inplace=True)

# Add prediction and confidence fields
llm_df["llm_pred"] = llm_df[f"frame_{FRAME_INDEX}_name"].fillna("").apply(
    lambda x: 1 if x.strip().lower() == FRAME_NAME.lower() else 0
)
llm_df["confidence"] = pd.to_numeric(llm_df[f"frame_{FRAME_INDEX}_confidence"], errors="coerce")

# Gold labels from Yara
annotations_df = load_all_annotations()
yara_df = annotations_df[annotations_df["user_id"].str.lower() == "yara"]
yara_df["label"] = yara_df[FRAME_COLUMN].apply(encode_label)
yara_gold = yara_df[["uri", "label"]].dropna().set_index("uri")

# Align LLM + Yara labels
aligned = yara_gold.join(llm_df, how="inner")

# Extract mismatches
false_positives = aligned[(aligned["label"] == 0) & (aligned["llm_pred"] == 1)]
false_negatives = aligned[(aligned["label"] == 1) & (aligned["llm_pred"] == 0)]

# Combine for export
mismatches = pd.concat([false_positives, false_negatives])
mismatches["error_type"] = mismatches.apply(
    lambda row: "False Positive" if row["label"] == 0 else "False Negative", axis=1
)

# Rename columns for readability
mismatches_export = mismatches[[
    "translated_text",
    f"frame_{FRAME_INDEX}_rationale",
    "confidence",
    "llm_pred",
    "label",
    "error_type"
]].rename(columns={
    "translated_text": "article",
    f"frame_{FRAME_INDEX}_rationale": "llm_rationale",
    "confidence": "llm_confidence",
    "label": "yara_label",
    "llm_pred": "llm_prediction"
})
# Print mismatches for manual inspection
def print_disagreements(df, num=7):
    def pretty_print(row):
        print("="*80)
        print(f"🔎 Error Type:        {row['error_type']}")
        print(f"📄 Article:\n{row['article'][:1000]}")  # truncate if long
        print(f"\n🤖 LLM Prediction:   {row['llm_prediction']} (Confidence: {row['llm_confidence']})")
        print(f"📝 LLM Rationale:\n{row['llm_rationale']}")
        print(f"✅ Yara Label:        {row['yara_label']}")
        print("="*80 + "\n")

    print("\n📊 False Positives:\n")
    for _, row in mismatches_export[mismatches_export["error_type"] == "False Positive"].head(num).iterrows():
        pretty_print(row)

    print("\n📊 False Negatives:\n")
    for _, row in mismatches_export[mismatches_export["error_type"] == "False Negative"].head(num).iterrows():
        pretty_print(row)

# Call it
print_disagreements(mismatches_export, num=5)

⚠️ Skipping unknown annotator: Anne
✅ Annotators who coded all 15 articles: ['Yara']

📊 False Positives:

🔎 Error Type:        False Positive
📄 Article:
They detained the father of an associate of Navalny – Information Agency PIK

An associate of Russian oppositionist Alexei Navalny accused the Russian authorities of detaining his father in an attempt to exert pressure on him.

Ivan Zhdanov, director of the Anti-Corruption Foundation founded by Navalny, announced that the police conducted a midnight raid on his father’s home in Rostov-on-Don on suspicion of abuse of power and took him for questioning.

“I have no doubts that this criminal case is connected to me and what I do,” Zhdanov, who is outside Russia as part of a group of opponents of the Kremlin, who are trying to gather support for new protests this spring against the regime in Russia, said.

According to Zhdanov, his 66-year-old father is a pensioner with various health problems and will find it difficult to survive in pre-t

In [33]:
# ======== COMPARE TO YARA WITH CONFIDENCE ANALYSIS ========
import numpy as np

annotations_df = load_all_annotations()
yara_df = annotations_df[annotations_df["user_id"].str.lower() == "yara"]

# Get Yara's label for this frame
yara_df["label"] = yara_df[FRAME_COLUMN].apply(encode_label)
yara_gold = yara_df[["uri", "label"]].dropna().set_index("uri")

# Load LLM predictions and confidence
llm_df = pd.read_csv(UPDATED_OUTPUT_PATH).set_index("uri")
llm_df["llm_pred"] = llm_df[f"frame_{FRAME_INDEX}_name"].fillna("").apply(
    lambda x: 1 if x.strip().lower() == FRAME_NAME.lower() else 0
)
llm_df["confidence"] = pd.to_numeric(llm_df[f"frame_{FRAME_INDEX}_confidence"], errors="coerce")

# Align on shared URIs
aligned = yara_gold.join(llm_df[["llm_pred", "confidence"]], how="inner")

print("\n=== Evaluation against Yara ===")
if aligned.empty:
    print("❌ No overlapping URIs found between LLM and Yara annotations.")
else:
    # Classification report
    print(classification_report(aligned["label"], aligned["llm_pred"], target_names=["Not Present", "Present"]))

    # Confidence analysis
    print("\n=== Confidence Analysis ===")

    # True Positives
    tp = aligned[(aligned["label"] == 1) & (aligned["llm_pred"] == 1)]
    # True Negatives
    tn = aligned[(aligned["label"] == 0) & (aligned["llm_pred"] == 0)]
    # False Positives
    fp = aligned[(aligned["label"] == 0) & (aligned["llm_pred"] == 1)]
    # False Negatives
    fn = aligned[(aligned["label"] == 1) & (aligned["llm_pred"] == 0)]

    def avg_conf(df):
        return round(df["confidence"].mean(), 1) if not df.empty else "n/a"

    print(f"✅ True Positives: {len(tp)} | Avg Confidence: {avg_conf(tp)}")
    print(f"✅ True Negatives: {len(tn)} | Avg Confidence: {avg_conf(tn)}")
    print(f"❌ False Positives: {len(fp)} | Avg Confidence: {avg_conf(fp)}")
    print(f"❌ False Negatives: {len(fn)} | Avg Confidence: {avg_conf(fn)}")

    # Print mismatches with confidence and optionally rationale
    if not fp.empty:
        print("\n🔍 False Positives:")
        print(fp[["llm_pred", "label", "confidence"]])

    if not fn.empty:
        print("\n🔍 False Negatives:")
        print(fn[["llm_pred", "label", "confidence"]])


⚠️ Skipping unknown annotator: Anne
⚠️ Skipping unknown annotator: anne
⚠️ Skipping unknown annotator: code1
⚠️ Skipping unknown annotator: coder1
⚠️ Skipping unknown annotator: frozen
⚠️ Skipping unknown annotator: simone
✅ Annotators who coded all 15 articles: ['Alexander', 'Assia', 'Elisa', 'Luigia', 'Yara']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yara_df["label"] = yara_df[FRAME_COLUMN].apply(encode_label)



=== Evaluation against Yara ===
              precision    recall  f1-score   support

 Not Present       0.90      0.75      0.82        12
     Present       0.40      0.67      0.50         3

    accuracy                           0.73        15
   macro avg       0.65      0.71      0.66        15
weighted avg       0.80      0.73      0.75        15


=== Confidence Analysis ===
✅ True Positives: 2 | Avg Confidence: 95.0
✅ True Negatives: 9 | Avg Confidence: 87.1
❌ False Positives: 3 | Avg Confidence: 90.7
❌ False Negatives: 1 | Avg Confidence: 85.0

🔍 False Positives:
            llm_pred  label  confidence
uri                                    
6571925780         1      0        95.0
7931240488         1      0        92.0
7940552433         1      0        85.0

🔍 False Negatives:
            llm_pred  label  confidence
uri                                    
6816585638         0      1        85.0


In [16]:
llm_labels = updated[col_name].map(lambda x: 1 if isinstance(x, str) and x.strip() else 0)

report = classification_report(gold_labels, llm_labels, target_names=["Not Present", "Present"])
print(report)

NameError: name 'gold_labels' is not defined