In [None]:
#select 100 random files from data/annotations folder
import os
import random
import shutil
import glob
import argparse
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tkinter as tk
from tkinter import simpledialog, messagebox


#set seed
random.seed(42)

all_files = glob.glob(os.path.join("data/annotations", '*'))
selected_files = random.sample(all_files, 100)

#check that selected files have chunk number in annotations
confirmed_files = []
selected_files_copy = selected_files.copy()

while len(confirmed_files) < 100:
    for file in selected_files_copy[:]:  # Iterate over a copy
        with open(file, 'r') as f:
            data = json.load(f)
        
        file_confirmed = False
        for person, annotations in data.items():
            for annotation in annotations:
                if 'chunk' in annotation:
                    confirmed_files.append(file)
                    file_confirmed = True
                    break
            if file_confirmed:
                break
        
        if not file_confirmed:
            # Resample
            selected_files_copy.remove(file)
            new_file = random.choice(all_files)
            while new_file in selected_files_copy:
                new_file = random.choice(all_files)
            selected_files_copy.append(new_file)

selected_files = confirmed_files

#select random annotation from each file; file structure
# {
#   "Dan Evans": [
#     {
#       "trait": "warmth",
    #   "action": "Wakes up in the middle of the night, loads his pistol, and stays alert to protect his family from potential danger.",
    #   "assessment": "Dan's vigilance and protective behavior toward his family indicate a sincere and responsible nature, showing care and concern for their safety.",
    #   "rating": 2,
    #   "chunk": 1
    # },
    # {
    #   "trait": "competence",
    #   "action": "Wakes up in the middle of the night, tense and sweaty, and immediately loads his pistol while listening carefully for any threat.",
    #   "assessment": "This action shows Dan's alertness, preparedness, and ability to respond quickly to potential danger, indicating a high level of situational awareness and readiness.",
    #   "rating": 2,
    #   "chunk": 1
    # },

#   ...
# }

import json
random_annotations = []
for file in selected_files:
    with open(file, 'r') as f:
        data = json.load(f)
    
    new_data = {}
    for person, annotations in data.items():
        if annotations:
            selected_annotation = random.choice(annotations)
            random_annotations.append({
                "file": file,
                "person": person,
                "annotation": selected_annotation
            })
        break

len(random_annotations)

In [None]:
#construct new annotation file and new chunk file, by concatenating all selected annotations and seeing which chunk number is referred to in annotation, then going to data/chunks/file and getting that chunk and writing all into a singluar chunk file
annotation_output = {}
chunk_output = {}
for item in random_annotations:
    annotation_output[item["person"] + os.path.basename(item["file"])] = [{
        "trait": item["annotation"]["trait"],
        "action": item["annotation"]["action"],
        "assessment": item["annotation"]["assessment"],
        "rating": item["annotation"]["rating"],
        "chunk": str(item["annotation"]["chunk"]) + os.path.basename(item["file"])
    }]
    
    chunk_file = os.path.join("data/chunks", os.path.basename(item["file"]))
    with open(chunk_file, 'r') as f:
        chunks = json.load(f)
        chunk_text = chunks.get(str(item["annotation"]["chunk"]), "")
        chunk_output[str(item["annotation"]["chunk"]) +os.path.basename(item["file"])] = chunk_text
#write to data/random_annotations/annotations.json and data/random_annotations/chunks.json
os.makedirs("data/random_annotations", exist_ok=True)
with open("data/random_annotations/annotations.json", 'w') as f:
    json.dump(annotation_output, f, indent=4)
with open("data/random_annotations/chunks.json", 'w') as f:
    json.dump(chunk_output, f, indent=4)

In [None]:

def score_annotations2(annotation_file, outputfile, chunkfile, summaryfile=None, num_annotations=100, labels=None, seed=42):
    """GUI for human evaluation of annotation quality with customizable labels.
    
    Args:
        annotation_file: Path to annotations JSON
        outputfile: Path to save evaluation results
        chunkfile: Path to text chunks JSON for context
        summaryfile: Path to save summary statistics
        num_annotations: Number of annotations to evaluate
        labels: Custom label options (default: Correct/Questionable/Incorrect)
        seed: Random seed for sampling
    """
    if labels is None:
        labels = ["Correct", "Questionable", "Incorrect"]
    
    if summaryfile is None:
        summaryfile = outputfile.replace(".json", "_summary.json")
    with open(annotation_file, "r", encoding="utf-8") as f:
        action_annotations_per_character = json.load(f)
    
    # Load chunks for context display
    with open(chunkfile, "r", encoding="utf-8") as f:
        chunks = json.load(f)
    
    # Flatten annotations
    all_annotations = []
    for character, actions in action_annotations_per_character.items():
        for action in actions:
            all_annotations.append((character, action))
    
    #print number of annotations
    print(f"Scoring {num_annotations} out of {len(all_annotations)} total annotations.")
    
    # Randomly sample annotations
    np.random.seed(seed)
    if len(all_annotations) > num_annotations:
        sampled_indices = np.random.choice(len(all_annotations), size=num_annotations, replace=False)
        annotations = [all_annotations[i] for i in sampled_indices]
    else:
        annotations = all_annotations

    # Check if output file exists and load previous results
    result = []
    start_idx = 0
    if os.path.exists(outputfile):
        print(f"Found existing output file: {outputfile}")
        try:
            with open(outputfile, "r") as f:
                for line_num, line in enumerate(f, 1):
                    if line.strip():
                        try:
                            entry = json.loads(line.strip())
                            result.append(entry)
                        except json.JSONDecodeError as e:
                            print(f"Warning: Could not parse line {line_num} in {outputfile}: {e}")
            start_idx = len(result)
            print(f"Resuming from annotation {start_idx + 1} of {len(annotations)}")
            
            # Verify that we haven't already completed all annotations
            if start_idx >= len(annotations):
                print("All annotations have already been completed!")
                return
                
        except Exception as e:
            print(f"Error reading existing output file: {e}")
            print("Starting from the beginning...")
            result = []
            start_idx = 0

    # --- Enhanced Tkinter GUI with simplified styling ---
    root = tk.Tk()
    root.title("üìö Character Annotation Evaluator")
    root.geometry("1200x900")
    root.configure(bg='#f8f9fa')
    
    # Configure style
    root.option_add('*Font', 'Segoe\\ UI 10')

    idx = [start_idx]  # Start from where we left off
    
    # Header frame with title and progress
    header_frame = tk.Frame(root, bg='#495057', height=80)
    header_frame.pack(fill=tk.X, padx=0, pady=0)
    header_frame.pack_propagate(False)

    title_label = tk.Label(
        header_frame, 
        text="üìö Character Annotation Evaluator", 
        font=("Segoe UI", 18, "bold"), 
        fg='white', 
        bg='#495057'
    )
    title_label.pack(pady=(15, 5))

    progress_label = tk.Label(
        header_frame, 
        text=f"Annotation 1 of {len(annotations)}", 
        font=("Segoe UI", 12), 
        fg='#e9ecef', 
        bg='#495057'
    )
    progress_label.pack()

    # Main content frame
    content_frame = tk.Frame(root, bg='#f8f9fa')
    content_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=(10, 20))

    # Combined character and annotation info frame
    annot_frame = tk.Frame(content_frame, bg='#6c757d', relief=tk.RAISED, bd=2)
    annot_frame.pack(fill=tk.X, pady=(0, 15))

    annot_header = tk.Label(
        annot_frame, 
        text="üìù AI annotation", 
        font=("Segoe UI", 12, "bold"), 
        fg='white', 
        bg='#6c757d',
        pady=5
    )
    annot_header.pack()

    annot_text = tk.Text(
        annot_frame, 
        height=6, 
        font=("Consolas", 12), 
        bg='#ffffff', 
        fg='#212529',
        relief=tk.FLAT,
        padx=10,
        pady=5
    )
    annot_text.pack(fill=tk.X, padx=10, pady=(0, 10))

    # Chunk content frame - simplified colors
    chunk_frame = tk.Frame(content_frame, bg='#6c757d', relief=tk.RAISED, bd=2)
    chunk_frame.pack(fill=tk.BOTH, expand=True, pady=(0, 15))

    chunk_header = tk.Label(
        chunk_frame, 
        text="üìñ Original Text Context", 
        font=("Segoe UI", 12, "bold"), 
        fg='white', 
        bg='#6c757d',
        pady=5
    )
    chunk_header.pack()

    # Search frame
    search_frame = tk.Frame(chunk_frame, bg='#6c757d')
    search_frame.pack(fill=tk.X, padx=10, pady=(0, 5))
    
    search_var = tk.StringVar()
    search_entry = tk.Entry(
        search_frame,
        textvariable=search_var,
        font=("Segoe UI", 10),
        bg='#ffffff',
        fg='#212529',
        relief=tk.FLAT,
        width=30
    )
    search_entry.pack(side=tk.RIGHT, padx=(10, 0))
    
    search_label = tk.Label(
        search_frame,
        text="üîç Search:",
        font=("Segoe UI", 10),
        fg='white',
        bg='#6c757d'
    )
    search_label.pack(side=tk.RIGHT, padx=(0, 5))

    # Scrollable text widget for chunk content
    text_container = tk.Frame(chunk_frame, bg='#6c757d')
    text_container.pack(fill=tk.BOTH, expand=True, padx=10, pady=(0, 10))
    
    text_widget = tk.Text(
        text_container, 
        wrap=tk.WORD, 
        font=("Georgia", 13), 
        bg='#ffffff', 
        fg='#212529',
        relief=tk.FLAT,
        padx=15,
        pady=10,
        selectbackground='#007bff',
        selectforeground='white'
    )
    scrollbar = tk.Scrollbar(text_container, orient=tk.VERTICAL, command=text_widget.yview)
    text_widget.configure(yscrollcommand=scrollbar.set)
    
    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
    text_widget.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

    # Configure text widget tags for highlighting
    text_widget.tag_configure("char_bold", font=("Georgia", 13, "bold"), foreground="#007bff")
    text_widget.tag_configure("search_highlight", background="#ffff00", foreground="#000000")

    def search_text():
        """Search for text in the text widget and highlight matches."""
        search_term = search_var.get().strip()
        
        # Remove previous search highlights
        text_widget.tag_remove("search_highlight", "1.0", tk.END)
        
        if not search_term:
            return
        
        # Search for all occurrences of the search term (case-insensitive)
        start_pos = "1.0"
        while True:
            pos = text_widget.search(search_term, start_pos, tk.END, nocase=True)
            if not pos:
                break
            
            # Calculate end position
            end_pos = f"{pos}+{len(search_term)}c"
            
            # Highlight the match
            text_widget.tag_add("search_highlight", pos, end_pos)
            
            # Move to next character for next search
            start_pos = f"{pos}+1c"
    
    # Bind search function to entry changes
    search_var.trace('w', lambda *args: search_text())
    
    # Bind Enter key to search entry for convenience
    search_entry.bind('<Return>', lambda e: search_text())

    # Buttons frame with enhanced styling
    btn_frame = tk.Frame(root, bg='#495057', height=80)
    btn_frame.pack(fill=tk.X, pady=0)
    btn_frame.pack_propagate(False)

    btn_container = tk.Frame(btn_frame, bg='#495057')
    btn_container.pack(expand=True)

    def update_annotation():
        if idx[0] < len(annotations):
            character, action = annotations[idx[0]]
            
            # Update progress
            progress_label.config(text=f"Annotation {idx[0] + 1} of {len(annotations)}")
            
            # Clear search when updating annotation
            search_var.set("")
            
            # Update annotation details including character name with bold formatting
            annot_text.config(state=tk.NORMAL)
            annot_text.delete(1.0, tk.END)
            
            # Configure bold tag
            annot_text.tag_configure("bold", font=("Consolas", 12, "bold"))
            
            # Insert character name with bold formatting
            annot_text.insert(tk.END, "Character: ", "bold")
            annot_text.insert(tk.END, f"{character}\n")
            
            # Insert other annotation details, excluding chunk/evidence keys here
            for key, value in action.items():
                if key in ["chunk", "evidence_raw", "evidence_thoughts", "evidence_label"]:
                    continue
                annot_text.insert(tk.END, f"{key}: ", "bold")
                annot_text.insert(tk.END, f"{value}\n")

            # Evidence section (if present)
            has_evidence_label = "evidence_label" in action and action["evidence_label"]
            has_evidence_thoughts = "evidence_thoughts" in action and action["evidence_thoughts"]
            if has_evidence_label or has_evidence_thoughts:
                if has_evidence_label:
                    annot_text.insert(tk.END, "evidence rating: ", "bold")
                    annot_text.insert(tk.END, f"{action.get('evidence_label', '')}\n")
                if has_evidence_thoughts:
                    annot_text.insert(tk.END, "thoughts: ", "bold")
                    annot_text.insert(tk.END, f"{action.get('evidence_thoughts', '')}\n")

            annot_text.config(state=tk.DISABLED)
            
            # Get chunk content
            chunk_num = action.get("chunk", "unknown")
            chunk_content = chunks.get(str(chunk_num), "Chunk content not found")
            
            # Update chunk content with character name highlighting
            text_widget.config(state=tk.NORMAL)
            text_widget.delete(1.0, tk.END)
            
            # Split character name into parts to handle first name, last name, etc.
            char_parts = character.split()
            
            # Insert text with character name highlighting
            remaining_text = chunk_content
            current_pos = 0
            
            while remaining_text:
                # Find the earliest occurrence of any character name part (case-insensitive)
                earliest_match = None
                earliest_pos = len(remaining_text)
                matched_part = ""
                
                for part in char_parts:
                    if len(part) >= 2:  # Only highlight parts with 2+ characters
                        pos = remaining_text.lower().find(part.lower())
                        if pos != -1 and pos < earliest_pos:
                            earliest_pos = pos
                            earliest_match = pos
                            matched_part = part
                
                if earliest_match is not None:
                    # Insert text before the match
                    text_widget.insert(tk.END, remaining_text[:earliest_pos])
                    
                    # Insert the matched character name part in bold
                    actual_match = remaining_text[earliest_pos:earliest_pos + len(matched_part)]
                    text_widget.insert(tk.END, actual_match, "char_bold")
                    
                    # Update remaining text
                    remaining_text = remaining_text[earliest_pos + len(matched_part):]
                else:
                    # No more matches, insert remaining text
                    text_widget.insert(tk.END, remaining_text)
                    break
            
            text_widget.config(state=tk.DISABLED)
            
        else:
            root.destroy()

    def make_label_handler(label):
        def handler():
            character, action = annotations[idx[0]]
            new_entry = {
                "character": character,
                "annotation": action,
                "human_label": label
            }
            result.append(new_entry)
            
            # Immediately save the new entry to file (append mode)
            with open(outputfile, "a") as f:
                f.write(json.dumps(new_entry) + "\n")
            
            idx[0] += 1
            update_annotation()
        return handler

    # Create enhanced buttons dynamically with black text
    button_colors = [
        ("#dc3545", "#c82333"),  # Red
        ("#fd7e14", "#e8620c"),  # Orange  
        ("#ffc107", "#e0a800"),  # Yellow
        ("#28a745", "#218838"),  # Green
        ("#007bff", "#0056b3"),  # Blue
        ("#6f42c1", "#5a32a3"),  # Purple
        ("#17a2b8", "#138496"),  # Teal
        ("#343a40", "#23272b")   # Dark
    ]
    
    for i, label in enumerate(labels):
        bg_color, hover_color = button_colors[i % len(button_colors)]
        
        btn = tk.Button(
            btn_container, 
            text=f"  {label.title()}  ",
            command=make_label_handler(label), 
            font=("Segoe UI", 11, "bold"),
            bg=bg_color,
            fg='black',  # Changed to black text
            activebackground=hover_color,
            activeforeground='black',  # Changed to black text
            relief=tk.FLAT,
            padx=20,
            pady=10,
            cursor='hand2'
        )
        btn.pack(side=tk.LEFT, padx=8, pady=15)
        
        # Add hover effects
        def on_enter(e, btn=btn, color=hover_color):
            btn.config(bg=color)
        def on_leave(e, btn=btn, color=bg_color):
            btn.config(bg=color)
            
        btn.bind("<Enter>", on_enter)
        btn.bind("<Leave>", on_leave)

    # Add keyboard shortcuts
    def on_key(event):
        key = event.keysym
        if key.isdigit() and int(key) <= len(labels):
            idx_key = int(key) - 1
            if 0 <= idx_key < len(labels):
                make_label_handler(labels[idx_key])()

    root.bind('<Key>', on_key)
    root.focus_set()

    # Instructions label
    instructions = tk.Label(
        root, 
        text=f"üí° Use number keys 1-{len(labels)} for quick selection or click buttons above",
        font=("Segoe UI", 9, "italic"),
        fg='#6c757d',
        bg='#f8f9fa',
        pady=5
    )
    instructions.pack()

    update_annotation()
    root.mainloop()

    # Calculate total number of evaluated annotations
    n_total = len(result)

    # Update summary statistics to include all labels
    summary_stats = {
        "n_total": n_total,
        "labels": labels
    }
    
    # Add counts and proportions for each label
    for label in labels:
        n_label = sum(1 for r in result if r["human_label"] == label)
        prop_label = n_label / n_total if n_total > 0 else 0
        
        # Bayesian credible interval for this label
        alpha = 1 + n_label
        beta_param = 1 + n_total - n_label
        lower = beta.ppf(0.025, alpha, beta_param)
        upper = beta.ppf(0.975, alpha, beta_param)
        
        label_key = label.replace(' ', '_')
        summary_stats[f"n_{label_key}"] = n_label
        summary_stats[f"proportion_{label_key}"] = prop_label
        summary_stats[f"95%_CI_lower_{label_key}"] = lower
        summary_stats[f"95%_CI_upper_{label_key}"] = upper

    # Save to summary file
    with open(summaryfile, "w") as f:
        json.dump(summary_stats, f, indent=2)


In [None]:
score_annotations2(annotation_file="data/random_annotations/annotations.json", chunkfile="data/random_annotations/chunks.json", outputfile="data/random_annotations/scored_annotations.json", labels=["Accurate", "Somewhat Accurate", "Somewhat Inaccurate", "Inaccurate"])