# Turn Signal Detection - Results Visualization Notebook

This notebook visualizes results from test runs executed on the cluster via sbatch.

## Workflow:
1. **On Cluster**: Run `test_pipeline.py` or `compare_prompts.py` via sbatch
2. **Results Saved**: Pipeline saves JSON results with predictions
3. **In Notebook**: Load results and visualize predictions vs ground truth

## Features:
- Load pre-computed test results
- Visualize predictions frame-by-frame
- Compare multiple prompt results
- Analyze failure cases
- Generate comparison reports

## Note on Turn Signals:
Turn signals can blink on/off within a sequence. A sequence might have:
- Frames 0-10: Signal OFF
- Frames 11-30: Signal ON (blinking)
- Frames 31-40: Signal OFF again

For video mode, the model outputs a single label for the whole sequence.
For single-image mode, each frame gets its own prediction.

In [None]:
import sys
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.gridspec import GridSpec
import numpy as np
import pandas as pd
from IPython.display import display, HTML
import json
from collections import Counter, defaultdict
import cv2
from PIL import Image

# Setup plotting
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 8)
plt.rcParams['figure.dpi'] = 100

print("✓ Imports successful")

## 1. Configuration

In [None]:
# === CONFIGURATION ===

# Directory where test_pipeline.py saved results
RESULTS_DIR = "results/qwen25_vl_video/test_runs"

# OR directory where compare_prompts.py saved results
# RESULTS_DIR = "prompt_results"

# Base directory for images
IMAGE_BASE_DIR = "/gpfs/space/projects/ml2024/"

print(f"Results directory: {RESULTS_DIR}")
print(f"Image base: {IMAGE_BASE_DIR}")

## 2. Browse Available Results

In [None]:
results_path = Path(RESULTS_DIR)

# Find result files
csv_files = list(results_path.rglob('*.csv'))
json_files = list(results_path.rglob('*.json'))

print(f"Found in {results_path}:")
print(f"  CSV files: {len(csv_files)}")
print(f"  JSON files: {len(json_files)}")

# Show recent files
if json_files:
    print(f"\nRecent JSON files:")
    for f in sorted(json_files, key=lambda x: x.stat().st_mtime, reverse=True)[:5]:
        print(f"  {f.name}")

## 3. Load Predictions

Load predictions from CSV or JSON files.

In [None]:
def load_predictions_from_csv(csv_path):
    """Load predictions from CSV file"""
    df = pd.read_csv(csv_path)
    
    # Group by sequence (if sequence_id or similar column exists)
    # Assuming CSV has: frame_id, label, confidence, etc.
    
    return df

def load_predictions_from_json(json_path):
    """Load predictions from JSON file"""
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    return data

# Load specific result file
# Choose one:

# Option 1: Load from CSV
if csv_files:
    result_file = csv_files[0]  # Or choose specific file
    print(f"Loading CSV: {result_file.name}")
    predictions_df = load_predictions_from_csv(result_file)
    display(predictions_df.head(10))

# Option 2: Load from JSON
elif json_files:
    # Filter out summary files, look for sequence-specific files
    sequence_files = [f for f in json_files if 'dataset_summary' not in f.name and 'pipeline_report' not in f.name]
    
    if sequence_files:
        result_file = sequence_files[0]
        print(f"Loading JSON: {result_file.name}")
        predictions_json = load_predictions_from_json(result_file)
        
        # Show structure
        print(f"\nJSON keys: {list(predictions_json.keys())}")
        if 'predictions' in predictions_json:
            print(f"Number of predictions: {len(predictions_json['predictions'])}")
            print(f"\nFirst prediction:")
            print(json.dumps(predictions_json['predictions'][0], indent=2))

## 4. Helper Functions for Visualization

In [None]:
def load_image(image_path):
    """Load and return image as RGB array"""
    try:
        img = cv2.imread(str(image_path))
        if img is not None:
            return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    except:
        pass
    return None

def visualize_frame_prediction(image, prediction, ground_truth=None):
    """
    Visualize a single frame with prediction overlay.
    
    Args:
        image: numpy array (H, W, 3)
        prediction: dict with 'label', 'confidence'
        ground_truth: optional ground truth label
    """
    fig, ax = plt.subplots(1, 1, figsize=(10, 6))
    
    ax.imshow(image)
    ax.axis('off')
    
    # Color coding
    color_map = {
        'none': 'gray',
        'left': 'yellow',
        'right': 'orange',
        'both': 'red'
    }
    
    label = prediction.get('label', 'none')
    conf = prediction.get('confidence', 0.0)
    color = color_map.get(label, 'white')
    
    # Add prediction text
    text = f"Pred: {label.upper()} ({conf:.2f})"
    ax.text(10, 30, text, fontsize=14, color=color, 
            bbox=dict(boxstyle='round', facecolor='black', alpha=0.7))
    
    # Add ground truth if available
    if ground_truth:
        match = "✓" if label == ground_truth else "✗"
        gt_color = 'lime' if label == ground_truth else 'red'
        ax.text(10, 60, f"GT: {ground_truth.upper()} {match}", 
                fontsize=14, color=gt_color,
                bbox=dict(boxstyle='round', facecolor='black', alpha=0.7))
    
    plt.tight_layout()
    return fig

def plot_sequence_timeline(predictions, ground_truth=None):
    """
    Plot timeline of predictions for a sequence.
    
    Args:
        predictions: list of prediction dicts
        ground_truth: optional list of GT labels
    """
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16, 6), 
                                    gridspec_kw={'height_ratios': [2, 1]})
    
    # Extract data
    frames = list(range(len(predictions)))
    labels = [p.get('label', 'none') for p in predictions]
    confidences = [p.get('confidence', 0.0) for p in predictions]
    
    # Map labels to numbers
    label_map = {'none': 0, 'left': 1, 'right': 2, 'both': 3}
    label_nums = [label_map.get(l, 0) for l in labels]
    
    # Plot labels
    ax1.plot(frames, label_nums, 'o-', linewidth=2, markersize=6, label='Prediction')
    
    # Plot ground truth if available
    if ground_truth:
        gt_nums = [label_map.get(gt, 0) for gt in ground_truth]
        ax1.plot(frames, gt_nums, 's-', linewidth=1, markersize=4, 
                alpha=0.7, label='Ground Truth', color='green')
    
    ax1.set_ylabel('Signal State', fontsize=12)
    ax1.set_yticks([0, 1, 2, 3])
    ax1.set_yticklabels(['None', 'Left', 'Right', 'Both'])
    ax1.set_title('Turn Signal Predictions Over Time', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    ax1.legend()
    
    # Plot confidence
    ax2.fill_between(frames, confidences, alpha=0.5, color='blue')
    ax2.plot(frames, confidences, 'b-', linewidth=2)
    ax2.axhline(y=0.5, color='r', linestyle='--', alpha=0.5, label='Threshold (0.5)')
    ax2.set_xlabel('Frame Index', fontsize=12)
    ax2.set_ylabel('Confidence', fontsize=12)
    ax2.set_ylim([0, 1])
    ax2.grid(True, alpha=0.3)
    ax2.legend()
    
    plt.tight_layout()
    return fig

print("✓ Visualization functions loaded")

## 5. Load and Visualize Specific Sequence

In [None]:
# === CONFIGURE SEQUENCE TO VISUALIZE ===

# Sequence ID (find from your results)
SEQUENCE_ID = "2024-07-09-16-49-42_mapping_tartu_streets/camera_wide_right_170" 
TRACK_ID = 170

# Path to predictions JSON for this sequence
pred_base = f"qwen25_vl_20260204_113951/sequences/{SEQUENCE_ID.replace('/', '_')}__track_{TRACK_ID}.json"
PREDICTIONS_FILE = results_path / pred_base

# Backward-compatible fallback (older naming)
if not PREDICTIONS_FILE.exists():
    legacy = results_path / f"{SEQUENCE_ID.replace('/', '_')}_track_{TRACK_ID}.json"
    if legacy.exists():
        PREDICTIONS_FILE = legacy

print(f"Sequence: {SEQUENCE_ID}")
print(f"Track: {TRACK_ID}")
print(f"Predictions file: {PREDICTIONS_FILE}")
print(f"Exists: {PREDICTIONS_FILE.exists()}")


In [None]:
# Load predictions for this sequence
if PREDICTIONS_FILE.exists():
    with open(PREDICTIONS_FILE, 'r') as f:
        seq_data = json.load(f)
    
    predictions = seq_data.get('predictions', [])
    print(f"Loaded {len(predictions)} predictions")
    
    # Show first prediction
    if predictions:
        print(f"\nFirst prediction:")
        print(json.dumps(predictions[0], indent=2))
else:
    print(f"⚠️  File not found: {PREDICTIONS_FILE}")
    print(f"\nAvailable files:")
    for f in sorted(results_path.glob('*.json'))[:10]:
        print(f"  {f.name}")

In [None]:
# Plot timeline
if predictions:
    # Extract ground truth if available
    ground_truth = None
    if 'metadata' in seq_data and 'ground_truth' in seq_data['metadata']:
        ground_truth = seq_data['metadata']['ground_truth']
    
    fig = plot_sequence_timeline(predictions, ground_truth)
    plt.show()
    
    # Print statistics
    labels = [p['label'] for p in predictions]
    label_counts = Counter(labels)
    
    print(f"\nLabel Distribution:")
    for label, count in label_counts.most_common():
        pct = count / len(predictions) * 100
        print(f"  {label:8s}: {count:3d} frames ({pct:5.1f}%)")
    
    confidences = [p['confidence'] for p in predictions]
    print(f"\nConfidence Statistics:")
    print(f"  Mean: {np.mean(confidences):.3f}")
    print(f"  Std:  {np.std(confidences):.3f}")
    print(f"  Min:  {np.min(confidences):.3f}")
    print(f"  Max:  {np.max(confidences):.3f}")

## 6. Visualize Sample Frames with Predictions

In [None]:
# Load CSV with image paths (from original tracking data)
TRACKING_CSV = "data/tracking_data.csv"

if Path(TRACKING_CSV).exists():
    tracking_df = pd.read_csv(TRACKING_CSV)
    
    # Filter for this sequence
    seq_df = tracking_df[
        (tracking_df['sequence_id'] == SEQUENCE_ID) & 
        (tracking_df['track_id'] == TRACK_ID)
    ].sort_values('frame_id')
    
    print(f"Found {len(seq_df)} frames in tracking data")
    
    # Show sample
    display(seq_df[['frame_id', 'crop_path', 'true_label']].head())
else:
    print(f"⚠️  Tracking CSV not found: {TRACKING_CSV}")
    seq_df = None

In [None]:
# Visualize frames at regular intervals
if seq_df is not None and predictions:
    num_samples = min(12, len(predictions))
    sample_indices = np.linspace(0, len(predictions)-1, num_samples, dtype=int)

    rows = (num_samples + 3) // 4
    cols = min(4, num_samples)

    fig, axes = plt.subplots(rows, cols, figsize=(16, rows*4))
    axes = axes.flatten() if num_samples > 1 else [axes]

    seq_df_by_id = seq_df.set_index('frame_id', drop=False)

    for idx, frame_idx in enumerate(sample_indices):
        pred = predictions[frame_idx]
        pred_frame_id = pred.get('frame_id', frame_idx)

        # Get image path from tracking data (match by frame_id if possible)
        if pred_frame_id in seq_df_by_id.index:
            frame_row = seq_df_by_id.loc[pred_frame_id]
        else:
            frame_row = seq_df.iloc[frame_idx]

        crop_path = Path(IMAGE_BASE_DIR) / frame_row['crop_path']

        # Load image
        img = load_image(crop_path)

        if img is not None:
            axes[idx].imshow(img)

            # Add prediction overlay
            label = pred['label']
            conf = pred['confidence']

            color_map = {'none': 'gray', 'left': 'yellow', 'right': 'orange', 'both': 'red'}
            color = color_map.get(label, 'white')

            axes[idx].text(10, 30, f"{label.upper()}\n{conf:.2f}", 
                          fontsize=10, color=color, weight='bold',
                          bbox=dict(boxstyle='round', facecolor='black', alpha=0.7))

            # Add ground truth
            if 'true_label' in frame_row and pd.notna(frame_row['true_label']):
                gt = frame_row['true_label']
                match = "✓" if label == gt else "✗"
                gt_color = 'lime' if label == gt else 'red'
                axes[idx].text(10, 60, f"GT: {gt} {match}", 
                              fontsize=9, color=gt_color,
                              bbox=dict(boxstyle='round', facecolor='black', alpha=0.7))

            axes[idx].set_title(f"Frame {pred_frame_id}", fontsize=10)
        else:
            axes[idx].text(0.5, 0.5, 'Image not found', 
                          ha='center', va='center', transform=axes[idx].transAxes)

        axes[idx].axis('off')

    # Hide unused subplots
    for idx in range(num_samples, len(axes)):
        axes[idx].axis('off')

    plt.tight_layout()
    plt.show()


## 7. Compare Multiple Prompt Results

In [None]:
# Load multiple prompt comparison results
COMPARISON_DIR = "prompt_results"

comparison_path = Path(COMPARISON_DIR)

if comparison_path.exists():
    result_files = list(comparison_path.glob('*.json'))
    result_files = [f for f in result_files if 'comparison' not in f.name]
    
    comparison_data = []
    
    for rf in result_files:
        with open(rf, 'r') as f:
            data = json.load(f)
        
        comparison_data.append({
            'Prompt': Path(data.get('prompt_file', '')).stem,
            'Timestamp': data.get('timestamp', ''),
            'Sequences': data.get('num_sequences', 0),
            'Accuracy': data.get('accuracy', 0.0),
            'Avg_Latency_ms': data.get('metrics', {}).get('avg_latency_ms', 0.0),
            'Parse_Success': data.get('metrics', {}).get('parse_success_rate', 0.0)
        })
    
    if comparison_data:
        comparison_df = pd.DataFrame(comparison_data)
        comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
        
        print(f"Comparing {len(comparison_data)} prompt results:\n")
        display(comparison_df)
        
        # Plot comparison
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        
        # Accuracy comparison
        ax1.barh(comparison_df['Prompt'], comparison_df['Accuracy'])
        ax1.set_xlabel('Accuracy')
        ax1.set_title('Prompt Accuracy Comparison')
        ax1.set_xlim([0, 1])
        
        # Latency comparison
        ax2.barh(comparison_df['Prompt'], comparison_df['Avg_Latency_ms'])
        ax2.set_xlabel('Average Latency (ms)')
        ax2.set_title('Prompt Latency Comparison')
        
        plt.tight_layout()
        plt.show()
else:
    print(f"Comparison directory not found: {COMPARISON_DIR}")

## 8. Analyze Failure Cases

In [None]:
# Find sequences where prediction != ground truth
# This requires loading results from compare_prompts.py format

if comparison_path.exists():
    # Load latest result
    latest_result = sorted(result_files, key=lambda f: f.stat().st_mtime)[-1]
    
    with open(latest_result, 'r') as f:
        result_data = json.load(f)
    
    if 'results' in result_data:
        results_list = result_data['results']
        
        # Find failures
        failures = []
        
        for r in results_list:
            gt = r.get('ground_truth')
            if gt:
                # For video mode
                if r['mode'] == 'video':
                    pred_label = r['prediction']['label']
                    if pred_label != gt:
                        failures.append({
                            'sequence_id': r['sequence_id'],
                            'ground_truth': gt,
                            'predicted': pred_label,
                            'confidence': r['prediction']['confidence'],
                            'reasoning': r['prediction'].get('reasoning', '')[:100]
                        })
        
        if failures:
            print(f"\nFound {len(failures)} failure cases:\n")
            failures_df = pd.DataFrame(failures)
            display(failures_df)
        else:
            print("\n✓ No failures found! Perfect accuracy.")

## 9. Export Analysis

Save analysis results for reporting.

In [None]:
# Export comparison table
if 'comparison_df' in locals():
    output_dir = Path('notebook_analysis')
    output_dir.mkdir(exist_ok=True)
    
    comparison_df.to_csv(output_dir / 'prompt_comparison.csv', index=False)
    print(f"✓ Saved prompt comparison to {output_dir / 'prompt_comparison.csv'}")

# Export failure cases
if 'failures_df' in locals():
    failures_df.to_csv(output_dir / 'failure_cases.csv', index=False)
    print(f"✓ Saved failure cases to {output_dir / 'failure_cases.csv'}")