# SAM3 Grid Search Quick Analysis
Efficient analysis of SAM3 boundary generation configurations with ground truth comparison.

In [1]:
import os
import sys
import json
import glob
import warnings
from pathlib import Path
from typing import Dict, List, Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import cv2

# Configuration
DATA_ROOT = Path("../data/cityscapes")
GRID_SEARCH_ROOT = Path("../grid_search_results")
RESULTS_DIR = GRID_SEARCH_ROOT / "analysis"
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

# Setup
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")
warnings.filterwarnings('ignore')

print(f"âœ“ Paths configured")
print(f"  Data: {DATA_ROOT}")
print(f"  Results: {RESULTS_DIR}")

âœ“ Paths configured
  Data: ../data/cityscapes
  Results: ../grid_search_results/analysis


## 1. Discover and Load Configuration Results

In [None]:
def discover_configs():
    """Find all generated configurations quickly"""
    masks_dir = GRID_SEARCH_ROOT / "masks"
    if not masks_dir.exists():
        return {}
    
    configs = {}
    
    # Look for any subdirectories containing val/ folders with .npy files
    for root, dirs, files in os.walk(masks_dir):
        if "val" in dirs:
            val_path = Path(root) / "val"
            npy_files = list(val_path.glob("*/*.npy"))
            
            if npy_files:
                # Extract config info from path
                config_dir_name = Path(root).name
                if config_dir_name.startswith("sam3_boundary_"):
                    name = config_dir_name.replace("sam3_boundary_", "")
                    parts = name.split("_")
                    
                    method = parts[0] if len(parts) > 0 else "unknown"
                    prompt = parts[1] if len(parts) > 1 else "unknown"
                    
                    configs[name] = {
                        "path": Path(root),
                        "method": method,
                        "prompt": prompt,
                        "num_files": len(npy_files)
                    }
    
    return configs

configs = discover_configs()
print(f"âœ“ Found {len(configs)} configurations with generated masks")
if configs:
    print("\nFound configurations:")
    for name, info in list(configs.items())[:5]:
        print(f"  {info['method']:12} {info['prompt']:16} - {info['num_files']:3} masks")
else:
    print("âš  No masks found yet")
    print("Run: python grid_search_sam3_generation.py --data_root ./data/cityscapes")
    print("Or:  python quick_launch_sam3.py balanced --data_root ./data/cityscapes")

## 2. Load Validation Images and Ground Truth

In [None]:
def get_validation_images():
    """Quick mapping of validation images to ground truth"""
    images_dir = DATA_ROOT / "leftImg8bit_trainvaltest" / "leftImg8bit" / "val"
    gt_dir = DATA_ROOT / "gtFine_trainvaltest" / "gtFine" / "val"
    
    val_images = {}
    for img_path in glob.glob(str(images_dir / "*" / "*_leftImg8bit.png")):
        img_path = Path(img_path)
        city = img_path.parent.name
        filename = img_path.stem.replace("_leftImg8bit", "")
        
        gt_path = gt_dir / city / f"{filename}_gtFine_labelIds.png"
        if gt_path.exists():
            image_id = f"{city}_{filename}"
            val_images[image_id] = {"gt": gt_path}
    
    return val_images

def extract_boundary_from_gt(gt_path):
    """Fast boundary extraction from labels"""
    labels = np.array(Image.open(gt_path), dtype=np.uint8)
    thin_classes = [4, 5, 6, 7, 11, 12, 17, 18]
    
    thin_mask = np.zeros_like(labels, dtype=bool)
    for cls in thin_classes:
        thin_mask |= (labels == cls)
    
    if thin_mask.sum() == 0:
        return np.zeros_like(thin_mask, dtype=bool)
    
    edges = cv2.Canny((thin_mask * 255).astype(np.uint8), 100, 200)
    boundary = cv2.dilate(edges.astype(np.uint8), np.ones((3,3), np.uint8), iterations=1).astype(bool)
    return boundary

val_images = get_validation_images()
print(f"âœ“ Found {len(val_images)} validation images")

## 3. Compute Metrics for All Configurations

In [None]:
def compute_metrics(pred, gt):
    """Fast metric computation"""
    pred = pred.astype(bool)
    gt = gt.astype(bool)
    
    intersection = (pred & gt).sum()
    union = (pred | gt).sum()
    iou = intersection / union if union > 0 else 0.0
    
    tp = intersection
    fp = (pred & ~gt).sum()
    fn = (~pred & gt).sum()
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    dice = 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) > 0 else 0.0
    
    return {"IoU": iou, "Dice": dice, "Precision": precision, "Recall": recall, "F1": f1}

def evaluate_all_configs():
    """Efficiently evaluate all configurations"""
    if not configs:
        return pd.DataFrame()
    
    results = []
    total_processed = 0
    
    for config_name, config_info in configs.items():
        config_dir = config_info["path"]
        val_dir = config_dir / "val"
        
        if not val_dir.exists():
            continue
        
        for image_id, img_info in val_images.items():
            if "gt" not in img_info:
                continue
            
            gt_path = img_info["gt"]
            city = image_id.split("_")[0]
            filename = "_".join(image_id.split("_")[1:])
            
            mask_path = val_dir / city / f"{filename}.npy"
            if not mask_path.exists():
                continue
            
            try:
                gt_boundary = extract_boundary_from_gt(gt_path)
                pred_boundary = np.load(mask_path)
                
                # Ensure same shape
                if pred_boundary.shape != gt_boundary.shape:
                    continue
                
                metrics = compute_metrics(pred_boundary, gt_boundary)
                results.append({
                    "config": config_name,
                    "method": config_info["method"],
                    "prompt": config_info["prompt"],
                    "image_id": image_id,
                    **metrics
                })
                total_processed += 1
            except Exception as e:
                pass
    
    print(f"  Processed {total_processed} image-config pairs")
    return pd.DataFrame(results)

if configs:
    print("Evaluating all configurations...")
    results_df = evaluate_all_configs()
    print(f"âœ“ Computed metrics for {len(results_df)} comparisons")
else:
    results_df = pd.DataFrame()
    print("No configurations to evaluate")

## 4. Create Summary Rankings

In [None]:
if len(results_df) > 0:
    # Configuration summary
    config_summary = results_df.groupby(["config", "method", "prompt"]).agg({
        "IoU": ["mean", "std", "count"],
        "Dice": "mean",
        "F1": "mean",
        "Precision": "mean",
        "Recall": "mean"
    }).round(4).reset_index()
    
    config_summary.columns = ["config", "method", "prompt", "IoU_mean", "IoU_std", "num_images", "Dice", "F1", "Precision", "Recall"]
    config_summary = config_summary.sort_values("IoU_mean", ascending=False)
    
    # Method summary
    method_summary = results_df.groupby("method").agg({
        "IoU": ["mean", "std", "count"],
        "Dice": "mean",
        "F1": "mean"
    }).round(4)
    
    # Prompt summary
    prompt_summary = results_df.groupby("prompt").agg({
        "IoU": ["mean", "std", "count"],
        "Dice": "mean",
        "F1": "mean"
    }).round(4)
    
    print("="*80)
    print(f"TOP 10 CONFIGURATIONS BY IoU (from {len(config_summary)} total)")
    print("="*80)
    print(config_summary.head(10)[["config", "method", "prompt", "num_images", "IoU_mean", "Dice", "F1"]].to_string(index=False))
    
    print("\n" + "="*80)
    print("METHOD PERFORMANCE")
    print("="*80)
    print(method_summary)
    
    print("\n" + "="*80)
    print("PROMPT LEVEL PERFORMANCE")
    print("="*80)
    print(prompt_summary)
    
    # Save results
    config_summary.to_csv(RESULTS_DIR / "configuration_summary.csv", index=False)
    results_df.to_csv(RESULTS_DIR / "detailed_results.csv", index=False)
    print(f"\nâœ“ Results saved:")
    print(f"  {RESULTS_DIR / 'configuration_summary.csv'}")
    print(f"  {RESULTS_DIR / 'detailed_results.csv'}")
else:
    print("âš  No evaluation results yet")
    print("\nTo generate results, run:")
    print("  python quick_launch_sam3.py balanced --data_root ./data/cityscapes")
    print("Then come back and run this notebook again.")

## 5. Visualizations

In [None]:
if len(results_df) > 0:
    # 1. Method performance
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    fig.suptitle("Performance by Method", fontsize=14, fontweight='bold')
    
    metrics = ["IoU", "Dice", "F1"]
    for idx, metric in enumerate(metrics):
        data = [results_df[results_df["method"] == m][metric].values for m in sorted(results_df["method"].unique())]
        bp = axes[idx].boxplot(data, labels=sorted(results_df["method"].unique()), patch_artist=True)
        
        for patch in bp['boxes']:
            patch.set_facecolor('lightblue')
        
        axes[idx].set_ylabel(metric)
        axes[idx].set_title(metric)
        axes[idx].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(RESULTS_DIR / "method_performance.png", dpi=100, bbox_inches='tight')
    plt.show()
    print("âœ“ Saved: method_performance.png")
    
    # 2. Heatmap: Method vs Prompt
    fig, ax = plt.subplots(figsize=(10, 5))
    pivot = results_df.pivot_table(values="IoU", index="method", columns="prompt", aggfunc="mean")
    
    sns.heatmap(pivot, annot=True, fmt=".3f", cmap="RdYlGn", ax=ax, cbar_kws={"label": "IoU"})
    ax.set_title("IoU: Method vs Prompt Level", fontsize=14, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig(RESULTS_DIR / "method_prompt_heatmap.png", dpi=100, bbox_inches='tight')
    plt.show()
    print("âœ“ Saved: method_prompt_heatmap.png")
    
    # 3. Top configs ranking
    fig, ax = plt.subplots(figsize=(10, 6))
    top_configs = config_summary.head(15)
    
    y_pos = np.arange(len(top_configs))
    ax.barh(y_pos, top_configs["IoU_mean"].values, color="steelblue", xerr=top_configs["IoU_std"].values)
    ax.set_yticks(y_pos)
    ax.set_yticklabels([c[:50] for c in top_configs["config"].values], fontsize=9)
    ax.set_xlabel("IoU Score", fontweight='bold')
    ax.set_title("Top 15 Configurations", fontsize=14, fontweight='bold')
    ax.invert_yaxis()
    
    plt.tight_layout()
    plt.savefig(RESULTS_DIR / "top_configurations.png", dpi=100, bbox_inches='tight')
    plt.show()
    print("âœ“ Saved: top_configurations.png")
else:
    print("No data for visualizations")

## 6. Recommendations

In [None]:
if len(results_df) > 0:
    print("\n" + "="*80)
    print("RECOMMENDATIONS FOR BOUNDARY TRAINING")
    print("="*80)
    
    # Best overall
    best = config_summary.iloc[0]
    print(f"\nðŸ¥‡ OVERALL BEST")
    print(f"  Config: {best['config']}")
    print(f"  IoU: {best['IoU_mean']:.4f} Â± {best['IoU_std']:.4f}")
    print(f"  Dice: {best['Dice']:.4f}")
    print(f"  F1: {best['F1']:.4f}")
    
    # Best per method
    print(f"\nâš¡ BEST PER METHOD")
    for method in sorted(results_df["method"].unique()):
        method_best = config_summary[config_summary["method"] == method].iloc[0]
        print(f"  {method:12}: {method_best['prompt']:16} (IoU: {method_best['IoU_mean']:.4f})")
    
    # Best per prompt
    print(f"\nðŸ’¬ BEST PER PROMPT LEVEL")
    for prompt in sorted(results_df["prompt"].unique()):
        prompt_best = config_summary[config_summary["prompt"] == prompt].iloc[0]
        print(f"  {prompt:16}: {prompt_best['method']:12} (IoU: {prompt_best['IoU_mean']:.4f})")
    
    # Export report
    report = f"""
SAM3 GRID SEARCH RESULTS
Generated: {pd.Timestamp.now()}

BEST CONFIGURATION
{best['config']}
- IoU: {best['IoU_mean']:.4f} Â± {best['IoU_std']:.4f}
- Dice: {best['Dice']:.4f}
- F1: {best['F1']:.4f}

METHOD COMPARISON
{method_summary.to_string()}

PROMPT LEVEL COMPARISON  
{prompt_summary.to_string()}

KEY INSIGHTS
- Total configurations tested: {len(config_summary)}
- Total image evaluations: {len(results_df)}
- Best performing method: {config_summary.iloc[0]['method']}
- Best performing prompt: {config_summary.iloc[0]['prompt']}
"""
    
    with open(RESULTS_DIR / "RECOMMENDATIONS.txt", "w") as f:
        f.write(report)
    
    print(f"\nâœ“ Full report saved to RECOMMENDATIONS.txt")
else:
    print("No results to show recommendations")