# Human-in-the-Loop Annotation Interface

Interactive notebook for reviewing and refining ambiguous ontological annotations.

## Usage
1. Run cells in order
2. Use the widgets to review samples
3. Modify soft targets as needed
4. Export refined annotations

In [None]:
import pandas as pd
import numpy as np
import json
from pathlib import Path
from IPython.display import display, HTML, clear_output

# Optional: ipywidgets for interactive UI
try:
    import ipywidgets as widgets
    HAS_WIDGETS = True
except ImportError:
    HAS_WIDGETS = False
    print("Note: Install ipywidgets for interactive UI: pip install ipywidgets")

In [None]:
# Configuration
PARQUET_PATH = "../data/base_manifest_db.parquet"
OUTPUT_PATH = "../data/refined_annotations.json"

# Mode definitions
TEMPORAL_MODES = ["past", "present", "future"]
SPATIAL_MODES = ["thing", "place", "person"]
ONTOLOGICAL_MODES = ["imagined", "forgotten", "known"]

# Load data
df = pd.read_parquet(PARQUET_PATH)
print(f"Loaded {len(df)} samples")

# Detect columns
t_col = next((c for c in df.columns if 'temporal' in c.lower()), None)
s_col = next((c for c in df.columns if 'objectional' in c.lower() or 'spatial' in c.lower()), None)
o_col = next((c for c in df.columns if 'ontological' in c.lower()), None)
text_col = 'concept' if 'concept' in df.columns else 'lyric_text'

print(f"Detected columns: temporal={t_col}, spatial={s_col}, ontological={o_col}")

In [None]:
# Helper functions
def to_soft_target(label, modes, smoothing=0.1):
    """Convert discrete label to soft target."""
    if pd.isna(label) or label is None:
        return np.array([1/3, 1/3, 1/3])
    
    label = str(label).lower().strip()
    if label in ('none', 'null', '', 'black'):
        return np.array([1/3, 1/3, 1/3])
    
    try:
        idx = modes.index(label)
    except ValueError:
        return np.array([1/3, 1/3, 1/3])
    
    target = np.zeros(len(modes))
    target[idx] = 1.0
    
    if smoothing > 0:
        target = (1 - smoothing) * target + smoothing * (1/len(modes))
    
    return target

def get_album(temporal, spatial, ontological):
    """Get album from mode combination."""
    mapping = {
        ('past', 'thing', 'imagined'): 'Orange',
        ('past', 'thing', 'forgotten'): 'Red',
        ('past', 'person', 'known'): 'Violet',
        ('present', 'place', 'imagined'): 'Yellow',
        ('present', 'person', 'known'): 'Green',
        ('present', 'person', 'forgotten'): 'Indigo',
        ('future', 'place', 'known'): 'Blue',
    }
    key = (str(temporal).lower(), str(spatial).lower(), str(ontological).lower())
    return mapping.get(key, 'Other')

In [None]:
# Find ambiguous samples (nulls or could be reviewed)
ambiguous_mask = df[t_col].isna() | df[s_col].isna() | df[o_col].isna()
ambiguous_indices = df[ambiguous_mask].index.tolist()
labeled_indices = df[~ambiguous_mask].index.tolist()

print(f"Ambiguous (null labels): {len(ambiguous_indices)}")
print(f"Labeled: {len(labeled_indices)}")

In [None]:
# Storage for annotations
annotations = {}

def save_annotation(idx, temporal_dist, spatial_dist, onto_dist, note=""):
    """Save annotation for a sample."""
    annotations[int(idx)] = {
        'temporal': temporal_dist.tolist() if isinstance(temporal_dist, np.ndarray) else temporal_dist,
        'spatial': spatial_dist.tolist() if isinstance(spatial_dist, np.ndarray) else spatial_dist,
        'ontological': onto_dist.tolist() if isinstance(onto_dist, np.ndarray) else onto_dist,
        'note': note,
    }
    print(f"Saved annotation for sample {idx}")

## Simple Review Interface

Run the cell below to review samples one at a time.

In [None]:
def display_sample(idx):
    """Display a single sample for review."""
    row = df.iloc[idx]
    
    # Text
    text = row.get(text_col, '')
    if pd.isna(text):
        text = row.get('lyric_text', '')
    
    # Current labels
    t_val = row.get(t_col)
    s_val = row.get(s_col)
    o_val = row.get(o_col)
    
    # Soft targets
    t_target = to_soft_target(t_val, TEMPORAL_MODES)
    s_target = to_soft_target(s_val, SPATIAL_MODES)
    o_target = to_soft_target(o_val, ONTOLOGICAL_MODES)
    
    html = f"""
    <div style="padding: 15px; border: 1px solid #ddd; border-radius: 5px; margin: 10px 0;">
        <h3>Sample {idx}</h3>
        <p><strong>Text:</strong> {str(text)[:500]}{'...' if len(str(text)) > 500 else ''}</p>
        <hr>
        <table style="width: 100%;">
            <tr>
                <th>Dimension</th>
                <th>Current Label</th>
                <th>Soft Target</th>
            </tr>
            <tr>
                <td>Temporal</td>
                <td>{t_val or 'None'}</td>
                <td>[{', '.join(f'{v:.2f}' for v in t_target)}] (past/present/future)</td>
            </tr>
            <tr>
                <td>Spatial</td>
                <td>{s_val or 'None'}</td>
                <td>[{', '.join(f'{v:.2f}' for v in s_target)}] (thing/place/person)</td>
            </tr>
            <tr>
                <td>Ontological</td>
                <td>{o_val or 'None'}</td>
                <td>[{', '.join(f'{v:.2f}' for v in o_target)}] (imagined/forgotten/known)</td>
            </tr>
        </table>
    </div>
    """
    display(HTML(html))
    return t_target, s_target, o_target

# Example: display first ambiguous sample
if ambiguous_indices:
    display_sample(ambiguous_indices[0])

## Interactive Widget Interface

If ipywidgets is installed, use this more interactive interface.

In [None]:
if HAS_WIDGETS:
    # State
    current_idx = [0]
    indices_to_review = ambiguous_indices[:50] if ambiguous_indices else labeled_indices[:50]
    
    # Widgets
    output = widgets.Output()
    
    temporal_sliders = [
        widgets.FloatSlider(value=0.33, min=0, max=1, step=0.01, description=m.capitalize())
        for m in TEMPORAL_MODES
    ]
    spatial_sliders = [
        widgets.FloatSlider(value=0.33, min=0, max=1, step=0.01, description=m.capitalize())
        for m in SPATIAL_MODES
    ]
    onto_sliders = [
        widgets.FloatSlider(value=0.33, min=0, max=1, step=0.01, description=m.capitalize())
        for m in ONTOLOGICAL_MODES
    ]
    
    note_input = widgets.Text(description='Note:')
    
    prev_btn = widgets.Button(description='Previous')
    next_btn = widgets.Button(description='Next')
    save_btn = widgets.Button(description='Save', button_style='success')
    skip_btn = widgets.Button(description='Skip')
    
    def update_display():
        with output:
            clear_output(wait=True)
            if current_idx[0] < len(indices_to_review):
                idx = indices_to_review[current_idx[0]]
                t, s, o = display_sample(idx)
                
                # Update sliders
                for i, slider in enumerate(temporal_sliders):
                    slider.value = t[i]
                for i, slider in enumerate(spatial_sliders):
                    slider.value = s[i]
                for i, slider in enumerate(onto_sliders):
                    slider.value = o[i]
                    
                print(f"\nProgress: {current_idx[0] + 1} / {len(indices_to_review)}")
            else:
                print("All samples reviewed!")
    
    def on_prev(b):
        if current_idx[0] > 0:
            current_idx[0] -= 1
            update_display()
    
    def on_next(b):
        if current_idx[0] < len(indices_to_review) - 1:
            current_idx[0] += 1
            update_display()
    
    def on_save(b):
        idx = indices_to_review[current_idx[0]]
        t_dist = np.array([s.value for s in temporal_sliders])
        s_dist = np.array([s.value for s in spatial_sliders])
        o_dist = np.array([s.value for s in onto_sliders])
        
        # Normalize
        t_dist = t_dist / t_dist.sum()
        s_dist = s_dist / s_dist.sum()
        o_dist = o_dist / o_dist.sum()
        
        save_annotation(idx, t_dist, s_dist, o_dist, note_input.value)
        note_input.value = ''
        on_next(b)
    
    def on_skip(b):
        on_next(b)
    
    prev_btn.on_click(on_prev)
    next_btn.on_click(on_next)
    save_btn.on_click(on_save)
    skip_btn.on_click(on_skip)
    
    # Layout
    temporal_box = widgets.VBox([widgets.Label('Temporal:')] + temporal_sliders)
    spatial_box = widgets.VBox([widgets.Label('Spatial:')] + spatial_sliders)
    onto_box = widgets.VBox([widgets.Label('Ontological:')] + onto_sliders)
    
    sliders_row = widgets.HBox([temporal_box, spatial_box, onto_box])
    buttons_row = widgets.HBox([prev_btn, skip_btn, save_btn, next_btn])
    
    ui = widgets.VBox([output, sliders_row, note_input, buttons_row])
    
    display(ui)
    update_display()
else:
    print("Install ipywidgets for interactive UI: pip install ipywidgets")
    print("Using simple display mode instead.")

## Export Annotations

In [None]:
def export_annotations(path=OUTPUT_PATH):
    """Export annotations to JSON."""
    output = {
        'annotations': annotations,
        'total_annotated': len(annotations),
        'source_file': PARQUET_PATH,
    }
    
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    with open(path, 'w') as f:
        json.dump(output, f, indent=2)
    
    print(f"Exported {len(annotations)} annotations to {path}")

# Run when ready to export
# export_annotations()

In [None]:
# Summary of current annotations
print(f"Annotations collected: {len(annotations)}")
if annotations:
    print("\nSample annotations:")
    for idx, ann in list(annotations.items())[:3]:
        print(f"  {idx}: T={ann['temporal']}, S={ann['spatial']}, O={ann['ontological']}")