In [1]:
# Cell 1: Configuration and Setup
import os
import shutil
import torch
from PIL import Image
import cv2
import numpy as np
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Configuration Settings
CONFIG = {
    # Model Selection (Hugging Face OWL-ViT)
    'MODEL_NAME': 'google/owlvit-base-patch32',  # Options: owlvit-base-patch32, owlvit-base-patch16, owlvit-large-patch14
    'DEVICE': 'auto',  # 'auto', 'cpu', 'cuda'

    # Paths
    'SUSPECTS_GALLERY_PATH': '../../datasets/images/objects/raw',  # Input folder with suspect images
    'RESULTS_OUTPUT_PATH': '../../datasets/images/objects/detections',      # Output folder for matched images

    # Detection parameters
    'CONFIDENCE_THRESHOLD': 0.1,   # Lower threshold for OWL-ViT (typically uses lower values)
    'DETECTION_THRESHOLD': 0.1,    # Detection threshold for OWL-ViT
    'NMS_THRESHOLD': 0.3,          # Non-maximum suppression threshold

    # Processing settings
    'BATCH_SIZE': 4,               # Default batch size for processing
    'MAX_RESULTS_DISPLAY': 10,     # Maximum results to display at once
    'FIGURE_SIZE': (12, 8),        # Size of result visualization
}

# Available OWL-ViT model options
AVAILABLE_MODELS = {
    'owlvit-base-patch32': {
        'name': 'OWL-ViT Base Patch32',
        'model_id': 'google/owlvit-base-patch32',
        'description': 'Fastest OWL-ViT model - good balance of speed and accuracy',
        'performance': 'Base performance, fastest inference'
    },
    'owlvit-base-patch16': {
        'name': 'OWL-ViT Base Patch16',
        'model_id': 'google/owlvit-base-patch16',
        'description': 'Higher resolution features - better accuracy, slower',
        'performance': 'Better accuracy, moderate speed'
    },
    'owlvit-large-patch14': {
        'name': 'OWL-ViT Large Patch14',
        'model_id': 'google/owlvit-large-patch14',
        'description': 'Best accuracy - largest model, slowest inference',
        'performance': 'Best accuracy, slowest speed'
    }
}

print("✅ Configuration loaded successfully")
print(f"📁 Suspects gallery: {CONFIG['SUSPECTS_GALLERY_PATH']}")
print(f"📁 Results output: {CONFIG['RESULTS_OUTPUT_PATH']}")
print(f"🦉 Selected model: {CONFIG['MODEL_NAME']}")

# Cell 2: Install and Import Dependencies
# Run this cell first to install required packages
try:
    import transformers
    print("✅ Transformers already installed")
except ImportError:
    print("⚠️ Installing required packages...")
    !pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
    !pip install transformers
    !pip install ipywidgets
    !pip install Pillow
    !pip install matplotlib
    !pip install opencv-python
    print("📦 Installation complete")

# Import required libraries
try:
    import torch
    from transformers import OwlViTProcessor, OwlViTForObjectDetection
    import requests
    from PIL import Image
    print("✅ All dependencies imported successfully")

    # Check PyTorch device compatibility
    print(f"🔧 PyTorch version: {torch.__version__}")
    if torch.cuda.is_available():
        print(f"🚀 CUDA available: {torch.cuda.get_device_name(0)}")
        default_device = "cuda"
    else:
        print("🖥️ Using CPU mode (CUDA not available)")
        default_device = "cpu"

    # Update config with detected device
    if CONFIG['DEVICE'] == 'auto':
        CONFIG['DEVICE'] = default_device
        print(f"📍 Auto-detected device: {CONFIG['DEVICE']}")

except ImportError as e:
    print(f"❌ Import error: {e}")
    print("🔧 Troubleshooting steps:")
    print("1. Restart kernel and run this cell again")
    print("2. Check if all packages installed correctly")

# Cell 3: Initialize Model and Directories
def setup_directories():
    """Create necessary directories if they don't exist"""
    os.makedirs(CONFIG['SUSPECTS_GALLERY_PATH'], exist_ok=True)
    os.makedirs(CONFIG['RESULTS_OUTPUT_PATH'], exist_ok=True)
    print(f"📁 Created directories: {CONFIG['SUSPECTS_GALLERY_PATH']}, {CONFIG['RESULTS_OUTPUT_PATH']}")

def load_owlvit_model(model_name=None):
    """Load OWL-ViT model from Hugging Face"""
    try:
        # Use provided model name or default from config
        if model_name is None:
            model_name = CONFIG['MODEL_NAME']

        device = CONFIG['DEVICE']
        print(f"📥 Loading OWL-ViT model: {model_name}")
        print(f"🖥️ Target device: {device}")

        # Load processor and model
        print("⏳ Loading processor...")
        processor = OwlViTProcessor.from_pretrained(model_name)

        print("⏳ Loading model weights...")
        model = OwlViTForObjectDetection.from_pretrained(model_name)

        # Move to device
        print(f"📍 Moving model to {device}...")
        model = model.to(device)
        model.eval()

        print(f"✅ Model loaded successfully!")
        print(f"   🦉 Model: {model_name}")
        print(f"   🖥️ Device: {device}")

        return model, processor, device, model_name

    except Exception as e:
        print(f"❌ Error loading model: {e}")
        print("\n🔧 Troubleshooting steps:")
        print("1. Check internet connection (models download from Hugging Face)")
        print("2. Verify model name is correct")
        print("3. Try switching to 'owlvit-base-patch32' for faster loading")
        print("4. Restart kernel if memory issues occur")
        return None, None, None, None

def switch_model(model_key):
    """Switch to a different OWL-ViT model variant"""
    if model_key in AVAILABLE_MODELS:
        CONFIG['MODEL_NAME'] = AVAILABLE_MODELS[model_key]['model_id']
        print(f"🔄 Switched to: {AVAILABLE_MODELS[model_key]['name']}")
        return load_owlvit_model()
    else:
        print(f"❌ Unknown model: {model_key}")
        print(f"Available models: {list(AVAILABLE_MODELS.keys())}")
        return None, None, None, None

# Initialize
setup_directories()
model, processor, device, model_name = load_owlvit_model()

# Cell 4: Core Search Functions
def process_image_batch(image_paths, model, processor, query, device, batch_size=4):
    """Process a batch of images efficiently using OWL-ViT"""
    batch_results = []

    # Process images one by one to avoid memory issues
    for i, img_path in enumerate(image_paths):
        try:
            # Progress indicator
            if i % 5 == 0:
                print(f"Processing {i+1}/{len(image_paths)}: {img_path.name[:30]}...", end='\r')

            # Load image
            image = Image.open(img_path).convert("RGB")

            # Prepare text queries for OWL-ViT (can handle multiple queries)
            # OWL-ViT expects a list of text queries
            text_queries = [query.strip()]

            # Process inputs
            inputs = processor(text=text_queries, images=image, return_tensors="pt")
            inputs = inputs.to(device)

            # Run inference
            with torch.no_grad():
                outputs = model(**inputs)

            # Post-process results
            # OWL-ViT outputs: logits, bbox predictions
            target_sizes = torch.Tensor([image.size[::-1]])  # (height, width)
            results = processor.post_process_object_detection(
                outputs=outputs,
                target_sizes=target_sizes,
                threshold=CONFIG['DETECTION_THRESHOLD']
            )

            # Filter by confidence threshold
            if results and len(results) > 0:
                result = results[0]  # First (and only) image in batch

                if 'scores' in result and len(result['scores']) > 0:
                    # Filter by confidence
                    high_conf_mask = result['scores'] >= CONFIG['CONFIDENCE_THRESHOLD']

                    if high_conf_mask.any():
                        filtered_boxes = result['boxes'][high_conf_mask]
                        filtered_scores = result['scores'][high_conf_mask]
                        filtered_labels = result['labels'][high_conf_mask]

                        # Convert labels to text (OWL-ViT returns label indices)
                        filtered_labels_text = [text_queries[label] for label in filtered_labels]

                        batch_results.append({
                            'image_path': img_path,
                            'image': image,
                            'boxes': filtered_boxes,
                            'confidence_scores': filtered_scores,
                            'labels': filtered_labels_text,
                            'query': query
                        })

            # Clear memory after each image
            del inputs, outputs, results, image
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

        except Exception as e:
            # Print error but continue processing
            print(f"\n⚠️ Error processing {img_path.name}: {str(e)[:50]}...")
            continue

        # Small break every 10 images to prevent system overload
        if i % 10 == 0 and i > 0:
            import time
            time.sleep(0.1)

    return batch_results

def search_images_with_query(query, model, processor, device, model_name, gallery_path, batch_size=4):
    """
    Search for objects in images using natural language query with OWL-ViT
    """
    results = []
    gallery_path = Path(gallery_path)

    if not gallery_path.exists():
        print(f"❌ Gallery path {gallery_path} does not exist")
        return results

    if not model or not processor:
        print("❌ Model or processor not loaded. Please check model initialization.")
        return results

    # Supported image extensions
    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
    image_files = [f for f in gallery_path.iterdir()
                  if f.suffix.lower() in image_extensions]

    if not image_files:
        print(f"⚠️ No images found in {gallery_path}")
        return results

    total_files = len(image_files)

    print(f"🔍 Processing {total_files} images for: '{query}'")
    print(f"🖥️ Device: {device} | Processing one by one for stability")
    print(f"🦉 Model: {model_name}")

    # Process images one by one with progress tracking
    try:
        print("⏳ Starting image processing...")
        results = process_image_batch(image_files, model, processor, query, device, batch_size)

        # Show final progress
        matches_found = len(results)
        print(f"\n📊 Final: {total_files}/{total_files} processed | {matches_found} matches found")

    except Exception as e:
        print(f"❌ Processing error: {e}")
        print("💡 Try reducing confidence threshold or switching to owlvit-base-patch32")

    # Final summary
    print(f"✅ Complete: {len(results)} images with matches found")
    return results

def copy_results_to_folder(results, output_folder):
    """Copy matched images to results folder"""
    output_path = Path(output_folder)
    output_path.mkdir(exist_ok=True)

    # Create subfolder with timestamp
    from datetime import datetime
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    search_folder = output_path / f"search_{timestamp}"
    search_folder.mkdir(exist_ok=True)

    copied_files = []

    for i, result in enumerate(results):
        try:
            source_path = result['image_path']
            # Create descriptive filename
            max_conf = float(result['confidence_scores'].max()) if len(result['confidence_scores']) > 0 else 0.0
            filename = f"{i+1:03d}_{source_path.stem}_conf{max_conf:.2f}{source_path.suffix}"
            dest_path = search_folder / filename

            shutil.copy2(source_path, dest_path)
            copied_files.append(dest_path)

        except Exception as e:
            print(f"❌ Error copying {source_path}: {e}")

    print(f"📋 Copied {len(copied_files)} files to {search_folder}")
    return search_folder, copied_files

# Cell 5: Interactive Query Interface
def create_search_interface():
    """Create interactive search interface for forensic analysts"""

    # Model selection dropdown
    model_options = [(f"{info['name']} - {info['description']}", key)
                    for key, info in AVAILABLE_MODELS.items()]

    model_selector = widgets.Dropdown(
        options=model_options,
        value='owlvit-base-patch32',
        description='Model:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='500px')
    )

    # Detection threshold slider (OWL-ViT specific)
    detection_threshold_slider = widgets.FloatSlider(
        value=CONFIG['DETECTION_THRESHOLD'],
        min=0.01,
        max=0.5,
        step=0.01,
        description='Detection Threshold:',
        style={'description_width': 'initial'}
    )

    # Input widgets
    query_input = widgets.Text(
        value='person with weapon',
        placeholder='Enter your search query (e.g., "person with weapon", "suspicious vehicle")',
        description='Query:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='500px')
    )

    confidence_slider = widgets.FloatSlider(
        value=CONFIG['CONFIDENCE_THRESHOLD'],
        min=0.01,
        max=0.5,
        step=0.01,
        description='Min Confidence:',
        style={'description_width': 'initial'}
    )

    search_button = widgets.Button(
        description='🔍 Search Gallery',
        button_style='primary',
        layout=widgets.Layout(width='150px')
    )

    copy_button = widgets.Button(
        description='📋 Copy Results',
        button_style='success',
        layout=widgets.Layout(width='150px'),
        disabled=True
    )

    clear_button = widgets.Button(
        description='🗑️ Clear Results',
        button_style='warning',
        layout=widgets.Layout(width='150px')
    )

    switch_model_button = widgets.Button(
        description='🔄 Switch Model',
        button_style='info',
        layout=widgets.Layout(width='150px')
    )

    # Output area
    output_area = widgets.Output()

    # Store results and current model
    search_results = []
    current_model = model
    current_processor = processor
    current_device = device
    current_model_name = model_name

    def on_model_switch_clicked(b):
        nonlocal current_model, current_processor, current_device, current_model_name
        with output_area:
            selected_model = model_selector.value
            print(f"🔄 Switching to: {AVAILABLE_MODELS[selected_model]['name']}")
            new_model, new_processor, new_device, new_model_name = switch_model(selected_model)
            if new_model and new_processor:
                current_model = new_model
                current_processor = new_processor
                current_device = new_device
                current_model_name = new_model_name
                print("✅ Model switched successfully!")
            else:
                print("❌ Failed to switch model")

    def on_search_clicked(b):
        nonlocal search_results
        with output_area:
            clear_output(wait=True)

            if not current_model or not current_processor:
                print("❌ Model not loaded. Please switch to a valid model first.")
                return

            query = query_input.value.strip()
            if not query:
                print("⚠️ Please enter a search query")
                return

            # Update configuration
            CONFIG['CONFIDENCE_THRESHOLD'] = confidence_slider.value
            CONFIG['DETECTION_THRESHOLD'] = detection_threshold_slider.value

            print(f"🚀 Starting OWL-ViT search: '{query}'")
            print(f"📊 Confidence: {CONFIG['CONFIDENCE_THRESHOLD']:.2f} | Detection: {CONFIG['DETECTION_THRESHOLD']:.2f}")
            print(f"💡 Processing images individually for stability")
            print("-" * 50)

            # Perform search
            search_results = search_images_with_query(
                query, current_model, current_processor, current_device, current_model_name,
                CONFIG['SUSPECTS_GALLERY_PATH'], CONFIG['BATCH_SIZE']
            )

            if search_results:
                copy_button.disabled = False
                display_results(search_results[:CONFIG['MAX_RESULTS_DISPLAY']])

                if len(search_results) > CONFIG['MAX_RESULTS_DISPLAY']:
                    print(f"\n📝 Showing first {CONFIG['MAX_RESULTS_DISPLAY']} results out of {len(search_results)} total matches")
            else:
                print("🔍 No matches found for your query")
                print("💡 Try lowering the confidence threshold or using different search terms")
                copy_button.disabled = True

    def on_copy_clicked(b):
        with output_area:
            if search_results:
                print("\n📋 Copying results to output folder...")
                folder, files = copy_results_to_folder(search_results, CONFIG['RESULTS_OUTPUT_PATH'])
                print(f"✅ Results saved to: {folder}")
            else:
                print("⚠️ No results to copy")

    def on_clear_clicked(b):
        nonlocal search_results
        search_results = []
        copy_button.disabled = True
        with output_area:
            clear_output()
            print("🗑️ Results cleared")

    # Connect button events
    switch_model_button.on_click(on_model_switch_clicked)
    search_button.on_click(on_search_clicked)
    copy_button.on_click(on_copy_clicked)
    clear_button.on_click(on_clear_clicked)

    # Layout
    controls = widgets.VBox([
        widgets.HTML("<h3>🦉 Forensic Image Search Interface (OWL-ViT)</h3>"),
        model_selector,
        query_input,
        widgets.HBox([confidence_slider, detection_threshold_slider]),
        widgets.HBox([search_button, copy_button, clear_button, switch_model_button]),
        widgets.HTML("<hr>")
    ])

    return widgets.VBox([controls, output_area])

def display_results(results):
    """Display search results with bounding boxes"""
    if not results:
        return

    cols = 2
    rows = (len(results) + cols - 1) // cols

    fig, axes = plt.subplots(rows, cols, figsize=CONFIG['FIGURE_SIZE'])
    if rows == 1:
        axes = [axes] if cols == 1 else axes
    else:
        axes = axes.flatten()

    for i, result in enumerate(results):
        ax = axes[i] if len(results) > 1 else axes

        # Display image
        image = result['image']
        ax.imshow(image)

        # Draw bounding boxes
        w, h = image.size
        boxes = result['boxes']
        confidences = result['confidence_scores']

        for box, conf in zip(boxes, confidences):
            # Convert from [x1, y1, x2, y2] to matplotlib rectangle
            x1, y1, x2, y2 = box.tolist()

            # Create rectangle
            rect = patches.Rectangle(
                (x1, y1), x2 - x1, y2 - y1,
                linewidth=2, edgecolor='red', facecolor='none'
            )
            ax.add_patch(rect)

            # Add confidence text
            ax.text(x1, y1 - 5, f'{conf:.2f}',
                   color='red', fontweight='bold', fontsize=10)

        ax.set_title(f"{result['image_path'].name}\nMatches: {len(boxes)}", fontsize=10)
        ax.axis('off')

    # Hide empty subplots
    for j in range(len(results), len(axes)):
        axes[j].axis('off')

    plt.tight_layout()
    plt.show()

# Display the interface
interface = create_search_interface()
display(interface)

# Cell 6: Batch Processing Functions (Enhanced)
def batch_search_multiple_queries(queries_list, model, processor, device, model_name, gallery_path, output_base_path, batch_size=4):
    """
    Process multiple queries in batch for comprehensive analysis using OWL-ViT
    """
    all_results = {}

    print(f"🚀 Starting batch analysis with {len(queries_list)} queries")
    print(f"📁 Gallery: {gallery_path}")
    print(f"🦉 Model: {model_name}")
    print("=" * 60)

    for i, query in enumerate(queries_list, 1):
        print(f"\n[{i}/{len(queries_list)}] Query: '{query}'")

        results = search_images_with_query(query, model, processor, device, model_name, gallery_path, batch_size)

        if results:
            # Create query-specific output folder
            query_folder = Path(output_base_path) / f"query_{query.replace(' ', '_').replace('/', '_')}"
            folder, files = copy_results_to_folder(results, query_folder)
            all_results[query] = {
                'results': results,
                'output_folder': folder,
                'file_count': len(files),
                'match_count': len(results)
            }
            print(f"📁 Saved {len(files)} files to: {folder.name}")
        else:
            all_results[query] = {
                'results': [],
                'output_folder': None,
                'file_count': 0,
                'match_count': 0
            }
            print("⚪ No matches found")

    # Summary report
    print("\n" + "="*60)
    print("📊 BATCH SEARCH SUMMARY")
    print("="*60)

    total_matches = 0
    total_files = 0

    for query, data in all_results.items():
        matches = data['match_count']
        files = data['file_count']
        total_matches += matches
        total_files += files

        status = "✅" if matches > 0 else "⚪"
        print(f"{status} '{query}': {matches} images, {files} files saved")

    print(f"\n🎯 TOTAL: {total_matches} matched images, {total_files} files copied")
    return all_results

# Enhanced batch processing with common forensic queries
def run_forensic_batch_analysis(custom_queries=None, batch_size=4, model_to_use=None):
    """Run comprehensive forensic analysis with predefined and custom queries using OWL-ViT"""

    # Use provided model or current global model
    if model_to_use:
        current_model, current_processor, current_device, current_model_name = model_to_use
    else:
        current_model, current_processor, current_device, current_model_name = model, processor, device, model_name

    # Default forensic queries optimized for OWL-ViT
    default_queries = [
        "person with weapon",
        "person holding gun",
        "person with knife",
        "suspicious vehicle",
        "person wearing mask",
        "person running",
        "backpack",
        "group of people",
        "person with phone",
        "person in dark clothing",
        "weapon",
        "gun",
        "knife"
    ]

    # Combine with custom queries if provided
    if custom_queries:
        queries = default_queries + custom_queries
        print(f"📋 Using {len(default_queries)} default + {len(custom_queries)} custom queries")
    else:
        queries = default_queries
        print(f"📋 Using {len(default_queries)} default forensic queries")

    if current_model and current_processor:
        print("🔍 Starting comprehensive OWL-ViT forensic analysis...")
        batch_results = batch_search_multiple_queries(
            queries,
            current_model,
            current_processor,
            current_device,
            current_model_name,
            CONFIG['SUSPECTS_GALLERY_PATH'],
            CONFIG['RESULTS_OUTPUT_PATH'],
            batch_size
        )
        return batch_results
    else:
        print("❌ Model or processor not loaded. Cannot run batch analysis.")
        return None

# Quick test with reduced output
def quick_forensic_search(query="person with weapon", batch_size=4, model_to_use=None):
    """Quick single query search for testing with OWL-ViT"""
    if model_to_use:
        current_model, current_processor, current_device, current_model_name = model_to_use
    else:
        current_model, current_processor, current_device, current_model_name = model, processor, device, model_name

    if not current_model or not current_processor:
        print("❌ Model or processor not loaded")
        return None

    print(f"🔍 Quick OWL-ViT search: '{query}'")
    print(f"🦉 Using model: {current_model_name}")

    results = search_images_with_query(query, current_model, current_processor, current_device, current_model_name,
                                     CONFIG['SUSPECTS_GALLERY_PATH'], batch_size)

    if results:
        print(f"📋 Found {len(results)} matches - ready for detailed analysis")
        return results
    else:
        print("⚪ No matches found")
        return []

# Model comparison utilities
def list_available_models():
    """Display available OWL-ViT models with their descriptions"""
    print("🦉 Available OWL-ViT Models:")
    print("-" * 50)
    for key, info in AVAILABLE_MODELS.items():
        print(f"🔹 {info['name']} ({key})")
        print(f"   📊 {info['performance']}")
        print(f"   📝 {info['description']}")
        print()

# Display available models
list_available_models()

# Uncomment to run batch analysis
# batch_results = run_forensic_batch_analysis(batch_size=4)

# Uncomment for quick test
# quick_results = quick_forensic_search("person holding gun", batch_size=4)

# Cell 7: Usage Instructions and Tips
print("""
🎯 FORENSIC IMAGE SEARCH SYSTEM - OWL-ViT VERSION
===============================================

🆕 OWL-ViT FEATURES:
• 🦉 Open-World Localization with Vision Transformers
• 🎯 Zero-shot object detection without training data
• 📊 Multiple model sizes for different speed/accuracy needs
• 🔍 Excellent performance on diverse object categories

📋 SETUP CHECKLIST:
1. ✅ Place suspect images in the './suspects_gallery' folder
2. ✅ Run all cells in order (1-6)
3. ✅ OWL-ViT models download automatically on first use

🦉 AVAILABLE OWL-ViT MODELS:
• Patch32: Fastest, good for quick analysis
• Patch16: Better accuracy, moderate speed
• Large Patch14: Best accuracy, slowest (recommended for critical analysis)

🔧 DEVICE COMPATIBILITY:
• System automatically detects GPU/CPU availability
• Models work on both CPU and GPU
• CPU mode: Slower but works on all systems
• GPU mode: Faster with CUDA support

🔍 SEARCH TIPS FOR OWL-ViT:
• Use simple, clear terms: "weapon", "gun", "knife", "person"
• OWL-ViT works well with single objects and people
• Try both specific and general terms: "weapon" vs "gun"
• Lower confidence thresholds (0.05-0.15) often work better
• Common forensic queries:
  - "weapon" / "gun" / "knife"
  - "person" / "person with weapon"
  - "vehicle" / "car" / "suspicious vehicle"
  - "mask" / "person wearing mask"
  - "backpack" / "bag"

⚙️ INTERFACE FEATURES:
• Model selector for switching between OWL-ViT variants
• Dual thresholds: Detection threshold + Confidence threshold
• Real-time search with progress tracking
• Copy results to organized folders
• Clear visual results with bounding boxes

🎛️ THRESHOLD TUNING:
• Detection Threshold (0.01-0.5): Controls initial detection sensitivity
• Confidence Threshold (0.01-0.5): Filters final results
• Start with low values (0.05-0.1) and adjust based on results
• OWL-ViT typically uses lower thresholds than other models

📁 OUTPUT STRUCTURE:
search_results/
├── search_20240611_143022/
│   ├── 001_suspect1_conf0.12.jpg
│   ├── 002_suspect5_conf0.18.jpg
│   └── ...

🚨 TROUBLESHOOTING:
• "Model loading failed" → Check internet connection (first download)
• "No matches found" → Try lowering detection/confidence thresholds
• "Low confidence scores" → Normal for OWL-ViT, try 0.05-0.15 range
• Slow performance → Use Patch32 model or reduce image count
• Memory issues → Restart kernel, use CPU mode

🔄 MODEL COMPARISON:
• Patch32: Fast processing, good for real-time analysis
• Patch16: Balanced performance, recommended for most forensic work
• Large Patch14: Maximum accuracy for critical evidence analysis

⭐ OWL-ViT ADVANTAGES:
• Excellent zero-shot performance on novel objects
• No need for training on specific datasets
• Strong performance on people and common objects
• Good generalization to forensic scenarios
• Multiple resolution options for different needs

💡 FORENSIC BEST PRACTICES:
• Start with broad terms like "person", "weapon", "vehicle"
• Use multiple queries for comprehensive analysis
• Review low-confidence detections manually
• Cross-reference results with different models
• Document threshold settings for evidence reports

🎯 PERFORMANCE EXPECTATIONS:
• OWL-ViT excels at detecting people and common objects
• May require lower confidence thresholds than other models
• Performs well on weapons, vehicles, and suspicious activities
• Best results with clear, well-lit images
• Good performance even on partially occluded objects

For batch processing of multiple queries, use the functions in Cell 6.
For comparing different models, use the model switching interface.
""")

✅ Configuration loaded successfully
📁 Suspects gallery: ../../datasets/images/objects/raw
📁 Results output: ../../datasets/images/objects/detections
🦉 Selected model: google/owlvit-base-patch32
✅ Transformers already installed
✅ All dependencies imported successfully
🔧 PyTorch version: 2.7.1+cpu
🖥️ Using CPU mode (CUDA not available)
📍 Auto-detected device: cpu
📁 Created directories: ../../datasets/images/objects/raw, ../../datasets/images/objects/detections
📥 Loading OWL-ViT model: google/owlvit-base-patch32
🖥️ Target device: cpu
⏳ Loading processor...
⏳ Loading model weights...
📍 Moving model to cpu...
✅ Model loaded successfully!
   🦉 Model: google/owlvit-base-patch32
   🖥️ Device: cpu


VBox(children=(VBox(children=(HTML(value='<h3>🦉 Forensic Image Search Interface (OWL-ViT)</h3>'), Dropdown(des…

🦉 Available OWL-ViT Models:
--------------------------------------------------
🔹 OWL-ViT Base Patch32 (owlvit-base-patch32)
   📊 Base performance, fastest inference
   📝 Fastest OWL-ViT model - good balance of speed and accuracy

🔹 OWL-ViT Base Patch16 (owlvit-base-patch16)
   📊 Better accuracy, moderate speed
   📝 Higher resolution features - better accuracy, slower

🔹 OWL-ViT Large Patch14 (owlvit-large-patch14)
   📊 Best accuracy, slowest speed
   📝 Best accuracy - largest model, slowest inference


🎯 FORENSIC IMAGE SEARCH SYSTEM - OWL-ViT VERSION

🆕 OWL-ViT FEATURES:
• 🦉 Open-World Localization with Vision Transformers
• 🎯 Zero-shot object detection without training data
• 📊 Multiple model sizes for different speed/accuracy needs
• 🔍 Excellent performance on diverse object categories

📋 SETUP CHECKLIST:
1. ✅ Place suspect images in the './suspects_gallery' folder
2. ✅ Run all cells in order (1-6)
3. ✅ OWL-ViT models download automatically on first use

🦉 AVAILABLE OWL-ViT MODELS:
•