In [1]:
# Cell 1: Configuration and Setup
import os
import shutil
import torch
from PIL import Image
import numpy as np
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Configuration Settings
CONFIG = {
    # Model Selection
    'GROUNDING_DINO_MODEL': 'IDEA-Research/grounding-dino-base',
    'CLIP_MODEL': 'ViT-B/32',
    'DEVICE': 'auto',

    # Paths
    'SUSPECTS_GALLERY_PATH': '../../datasets/images/objects/raw',  # Input folder with suspect images
    'RESULTS_OUTPUT_PATH': '../../datasets/images/objects/detections',      # Output folder for matched images
    # Detection parameters
    'GROUNDING_DINO_CONFIDENCE': 0.35,
    'GROUNDING_DINO_BOX_THRESHOLD': 0.3,
    'GROUNDING_DINO_TEXT_THRESHOLD': 0.25,

    'CLIP_SIMILARITY_THRESHOLD': 0.25,
    'CLIP_PATCH_SIZE': 224,
    'CLIP_STRIDE': 112,
    'CLIP_MAX_PATCHES': 16,

    # Processing settings
    'BATCH_SIZE': 4,
    'MAX_RESULTS_DISPLAY': 10,
    'FIGURE_SIZE': (15, 10),
    'COMPARISON_MODE': True,  # Show results from both models
}

# Available models
AVAILABLE_MODELS = {
    'grounding_dino': {
        'grounding-dino-tiny': 'IDEA-Research/grounding-dino-tiny',
        'grounding-dino-base': 'IDEA-Research/grounding-dino-base'
    },
    'clip': {
        'RN50': 'RN50',
        'RN101': 'RN101',
        'ViT-B/32': 'ViT-B/32',
        'ViT-B/16': 'ViT-B/16',
        'ViT-L/14': 'ViT-L/14'
    }
}

print("✅ Configuration loaded successfully")
print(f"📁 Suspects gallery: {CONFIG['SUSPECTS_GALLERY_PATH']}")
print(f"📁 Results output: {CONFIG['RESULTS_OUTPUT_PATH']}")
print(f"🎯 GroundingDINO: {CONFIG['GROUNDING_DINO_MODEL']}")
print(f"🔍 CLIP: {CONFIG['CLIP_MODEL']}")
print("🚀 Combined model approach for comprehensive analysis")

# Cell 2: Install and Import Dependencies
def install_dependencies():
    """Install all required dependencies"""
    import subprocess
    import sys

    packages = [
        "torch",
        "torchvision",
        "transformers",
        "git+https://github.com/openai/CLIP.git",
        "ipywidgets",
        "matplotlib",
        "pillow",
        "numpy"
    ]

    for package in packages:
        try:
            print(f"Installing {package}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        except subprocess.CalledProcessError as e:
            print(f"❌ Failed to install {package}: {e}")

    print("📦 Installation complete")

# Check and install dependencies
try:
    import transformers
    import clip
    print("✅ Dependencies already installed")
except ImportError:
    print("⚠️ Installing dependencies...")
    install_dependencies()
    print("🔄 Please restart kernel and run this cell again")

# Import required libraries
try:
    import torch
    import torch.nn.functional as F
    from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
    import clip
    from PIL import Image
    import numpy as np

    print("✅ All dependencies imported successfully")

    # Device setup
    if torch.cuda.is_available():
        print(f"🚀 CUDA available: {torch.cuda.get_device_name(0)}")
        default_device = "cuda"
    else:
        print("🖥️ Using CPU mode")
        default_device = "cpu"

    if CONFIG['DEVICE'] == 'auto':
        CONFIG['DEVICE'] = default_device
        print(f"📍 Device: {CONFIG['DEVICE']}")

except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Please restart kernel after installation")

# Cell 3: Model Loading and Management
class CombinedModels:
    """Combined GroundingDINO and CLIP models"""

    def __init__(self):
        self.grounding_dino_model = None
        self.grounding_dino_processor = None
        self.clip_model = None
        self.clip_preprocess = None
        self.device = CONFIG['DEVICE']

    def load_grounding_dino(self, model_name=None):
        """Load GroundingDINO model"""
        try:
            if model_name is None:
                model_name = CONFIG['GROUNDING_DINO_MODEL']

            print(f"📥 Loading GroundingDINO: {model_name}")

            self.grounding_dino_processor = AutoProcessor.from_pretrained(model_name)
            self.grounding_dino_model = AutoModelForZeroShotObjectDetection.from_pretrained(model_name)
            self.grounding_dino_model = self.grounding_dino_model.to(self.device)
            self.grounding_dino_model.eval()

            print("✅ GroundingDINO loaded successfully")
            return True

        except Exception as e:
            print(f"❌ Error loading GroundingDINO: {e}")
            return False

    def load_clip(self, model_name=None):
        """Load CLIP model"""
        try:
            if model_name is None:
                model_name = CONFIG['CLIP_MODEL']

            print(f"📥 Loading CLIP: {model_name}")

            self.clip_model, self.clip_preprocess = clip.load(model_name, device=self.device)
            self.clip_model.eval()

            print("✅ CLIP loaded successfully")
            return True

        except Exception as e:
            print(f"❌ Error loading CLIP: {e}")
            return False

    def load_both_models(self):
        """Load both models"""
        print("🚀 Loading both models...")

        gdino_success = self.load_grounding_dino()
        clip_success = self.load_clip()

        if gdino_success and clip_success:
            print("✅ Both models loaded successfully!")
            return True
        else:
            print("❌ Failed to load one or both models")
            return False

    def search_with_grounding_dino(self, image_path, query):
        """Search using GroundingDINO"""
        try:
            image = Image.open(image_path).convert("RGB")

            # Format query
            text_query = query.lower()
            if not text_query.endswith('.'):
                text_query += '.'

            # Process inputs
            inputs = self.grounding_dino_processor(images=image, text=text_query, return_tensors="pt")
            inputs = inputs.to(self.device)

            # Run inference
            with torch.no_grad():
                outputs = self.grounding_dino_model(**inputs)

            # Post-process
            results = self.grounding_dino_processor.post_process_grounded_object_detection(
                outputs,
                inputs.input_ids,
                box_threshold=CONFIG['GROUNDING_DINO_BOX_THRESHOLD'],
                text_threshold=CONFIG['GROUNDING_DINO_TEXT_THRESHOLD'],
                target_sizes=[image.size[::-1]]
            )

            if results and len(results) > 0:
                result = results[0]
                if 'scores' in result and len(result['scores']) > 0:
                    # Filter by confidence
                    high_conf_mask = result['scores'] >= CONFIG['GROUNDING_DINO_CONFIDENCE']

                    if high_conf_mask.any():
                        return {
                            'image_path': image_path,
                            'image': image,
                            'boxes': result['boxes'][high_conf_mask],
                            'scores': result['scores'][high_conf_mask],
                            'labels': [query] * len(result['scores'][high_conf_mask]),
                            'query': query,
                            'model': 'GroundingDINO'
                        }

            return None

        except Exception as e:
            print(f"GroundingDINO error on {image_path.name}: {e}")
            return None

    def search_with_clip(self, image_path, query):
        """Search using CLIP"""
        try:
            image = Image.open(image_path).convert("RGB")

            # Extract patches
            patches, positions = self._extract_patches(image)

            if not patches:
                return None

            # Compute similarities
            similarities = self._compute_clip_similarity(query, patches)

            if len(similarities) == 0:
                return None

            # Filter by threshold
            high_sim_mask = similarities >= CONFIG['CLIP_SIMILARITY_THRESHOLD']

            if not high_sim_mask.any():
                return None

            filtered_similarities = similarities[high_sim_mask]
            filtered_positions = [positions[i] for i in range(len(positions)) if high_sim_mask[i]]

            return {
                'image_path': image_path,
                'image': image,
                'boxes': torch.tensor(filtered_positions, dtype=torch.float32),
                'scores': torch.tensor(filtered_similarities, dtype=torch.float32),
                'labels': [query] * len(filtered_similarities),
                'query': query,
                'model': 'CLIP'
            }

        except Exception as e:
            print(f"CLIP error on {image_path.name}: {e}")
            return None

    def _extract_patches(self, image):
        """Extract patches for CLIP analysis"""
        width, height = image.size
        patch_size = CONFIG['CLIP_PATCH_SIZE']
        stride = CONFIG['CLIP_STRIDE']
        max_patches = CONFIG['CLIP_MAX_PATCHES']

        patches = []
        positions = []

        x_positions = range(0, max(1, width - patch_size + 1), stride)
        y_positions = range(0, max(1, height - patch_size + 1), stride)

        for y in y_positions:
            for x in x_positions:
                x_end = min(x + patch_size, width)
                y_end = min(y + patch_size, height)

                patch = image.crop((x, y, x_end, y_end))

                if patch.size != (patch_size, patch_size):
                    patch = patch.resize((patch_size, patch_size), Image.LANCZOS)

                patches.append(patch)
                positions.append((x, y, x_end, y_end))

                if len(patches) >= max_patches:
                    break
            if len(patches) >= max_patches:
                break

        return patches, positions

    def _compute_clip_similarity(self, text_query, images):
        """Compute CLIP similarity"""
        try:
            image_inputs = torch.stack([self.clip_preprocess(img) for img in images]).to(self.device)
            text_inputs = clip.tokenize([text_query]).to(self.device)

            with torch.no_grad():
                image_features = self.clip_model.encode_image(image_inputs)
                text_features = self.clip_model.encode_text(text_inputs)

                image_features = F.normalize(image_features, dim=-1)
                text_features = F.normalize(text_features, dim=-1)

                similarities = torch.matmul(image_features, text_features.T).squeeze(-1)

            return similarities.cpu().numpy()

        except Exception as e:
            print(f"CLIP similarity error: {e}")
            return np.array([])

def setup_directories():
    """Create necessary directories"""
    os.makedirs(CONFIG['SUSPECTS_GALLERY_PATH'], exist_ok=True)
    os.makedirs(CONFIG['RESULTS_OUTPUT_PATH'], exist_ok=True)
    print("📁 Directories ready")

# Initialize
setup_directories()
combined_models = CombinedModels()
models_loaded = combined_models.load_both_models()

# Cell 4: Combined Search Functions
def search_with_both_models(query, gallery_path, comparison_mode=True):
    """Search using both GroundingDINO and CLIP"""
    import time
    from datetime import datetime

    start_time = time.time()
    start_datetime = datetime.now()

    print(f"🔍 Combined search for: '{query}'")
    print(f"⏰ Start: {start_datetime.strftime('%H:%M:%S')}")
    print("-" * 60)

    # Get image files
    gallery_path = Path(gallery_path)
    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
    image_files = [f for f in gallery_path.iterdir() if f.suffix.lower() in image_extensions]

    if not image_files:
        print(f"⚠️ No images found in {gallery_path}")
        return {}, {}

    grounding_dino_results = []
    clip_results = []
    total_files = len(image_files)

    print(f"📸 Processing {total_files} images with both models...")

    for i, img_path in enumerate(image_files):
        if i % 10 == 0:
            print(f"Progress: {i+1}/{total_files}", end='\r')

        # Search with GroundingDINO
        if combined_models.grounding_dino_model:
            gdino_result = combined_models.search_with_grounding_dino(img_path, query)
            if gdino_result:
                grounding_dino_results.append(gdino_result)

        # Search with CLIP
        if combined_models.clip_model:
            clip_result = combined_models.search_with_clip(img_path, query)
            if clip_result:
                clip_results.append(clip_result)

        # Memory cleanup
        if torch.cuda.is_available() and i % 20 == 0:
            torch.cuda.empty_cache()

    # Calculate timing
    end_time = time.time()
    end_datetime = datetime.now()
    duration = end_time - start_time

    duration_str = f"{int(duration//60)}m {duration%60:.1f}s" if duration >= 60 else f"{duration:.1f}s"

    # Print summary
    print(f"\n" + "="*60)
    print(f"📊 COMBINED SEARCH SUMMARY")
    print(f"="*60)
    print(f"📸 Images processed: {total_files}")
    print(f"🎯 GroundingDINO matches: {len(grounding_dino_results)}")
    print(f"🔍 CLIP matches: {len(clip_results)}")
    print(f"⏰ Start: {start_datetime.strftime('%H:%M:%S')}")
    print(f"🏁 End: {end_datetime.strftime('%H:%M:%S')}")
    print(f"⏱️ Duration: {duration_str}")
    print(f"📈 Avg per image: {duration/total_files:.2f}s")
    print(f"="*60)

    return {
        'grounding_dino': grounding_dino_results,
        'clip': clip_results,
        'query': query,
        'total_images': total_files,
        'duration': duration
    }

def compare_model_results(results_dict):
    """Compare results from both models"""
    gdino_results = results_dict.get('grounding_dino', [])
    clip_results = results_dict.get('clip', [])

    print(f"\n📊 MODEL COMPARISON")
    print(f"-" * 40)
    print(f"🎯 GroundingDINO: {len(gdino_results)} matches")
    print(f"🔍 CLIP: {len(clip_results)} matches")

    # Find images detected by both models
    gdino_images = {result['image_path'].name for result in gdino_results}
    clip_images = {result['image_path'].name for result in clip_results}

    common_images = gdino_images & clip_images
    gdino_only = gdino_images - clip_images
    clip_only = clip_images - gdino_images

    print(f"🤝 Both models: {len(common_images)} images")
    print(f"🎯 GroundingDINO only: {len(gdino_only)} images")
    print(f"🔍 CLIP only: {len(clip_only)} images")

    if common_images:
        print(f"✅ High confidence matches (both models agree):")
        for img_name in sorted(list(common_images)[:5]):
            print(f"   • {img_name}")

    return {
        'common': common_images,
        'grounding_dino_only': gdino_only,
        'clip_only': clip_only
    }

def copy_combined_results(results_dict, output_folder):
    """Copy results from both models to organized folders"""
    if not results_dict:
        return None

    output_path = Path(output_folder)
    output_path.mkdir(exist_ok=True)

    # Create timestamped folder
    from datetime import datetime
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    search_folder = output_path / f"combined_search_{timestamp}"
    search_folder.mkdir(exist_ok=True)

    # Create subfolders for each model
    gdino_folder = search_folder / "grounding_dino"
    clip_folder = search_folder / "clip"
    both_folder = search_folder / "both_models"

    gdino_folder.mkdir(exist_ok=True)
    clip_folder.mkdir(exist_ok=True)
    both_folder.mkdir(exist_ok=True)

    gdino_results = results_dict.get('grounding_dino', [])
    clip_results = results_dict.get('clip', [])

    # Get image sets
    gdino_images = {result['image_path'].name: result for result in gdino_results}
    clip_images = {result['image_path'].name: result for result in clip_results}
    common_images = set(gdino_images.keys()) & set(clip_images.keys())

    copied_files = {'grounding_dino': [], 'clip': [], 'both': []}

    # Copy GroundingDINO results
    for i, result in enumerate(gdino_results):
        try:
            source_path = result['image_path']
            max_score = float(result['scores'].max()) if len(result['scores']) > 0 else 0.0
            filename = f"{i+1:03d}_{source_path.stem}_gdino{max_score:.2f}{source_path.suffix}"

            if source_path.name in common_images:
                dest_path = both_folder / filename
            else:
                dest_path = gdino_folder / filename

            shutil.copy2(source_path, dest_path)
            copied_files['grounding_dino'].append(dest_path)

        except Exception as e:
            print(f"Error copying GroundingDINO result: {e}")

    # Copy CLIP results
    for i, result in enumerate(clip_results):
        try:
            source_path = result['image_path']
            max_score = float(result['scores'].max()) if len(result['scores']) > 0 else 0.0
            filename = f"{i+1:03d}_{source_path.stem}_clip{max_score:.2f}{source_path.suffix}"

            if source_path.name not in common_images:  # Only copy if not already in both folder
                dest_path = clip_folder / filename
                shutil.copy2(source_path, dest_path)
                copied_files['clip'].append(dest_path)

        except Exception as e:
            print(f"Error copying CLIP result: {e}")

    total_copied = len(copied_files['grounding_dino']) + len(copied_files['clip'])

    print(f"📋 Results saved to: {search_folder}")
    print(f"   🎯 GroundingDINO: {len(copied_files['grounding_dino'])} files")
    print(f"   🔍 CLIP: {len(copied_files['clip'])} files")
    print(f"   🤝 Both models: {len(common_images)} files")

    return search_folder, copied_files

# Cell 5: Interactive Combined Interface
def create_combined_interface():
    """Create interface for combined model search"""

    # Widgets
    grounding_dino_selector = widgets.Dropdown(
        options=[(f"GroundingDINO {key}", val) for key, val in AVAILABLE_MODELS['grounding_dino'].items()],
        value=CONFIG['GROUNDING_DINO_MODEL'],
        description='GroundingDINO:',
        layout=widgets.Layout(width='350px')
    )

    clip_selector = widgets.Dropdown(
        options=[(f"CLIP {key}", val) for key, val in AVAILABLE_MODELS['clip'].items()],
        value=CONFIG['CLIP_MODEL'],
        description='CLIP:',
        layout=widgets.Layout(width='350px')
    )

    query_input = widgets.Text(
        value='person with weapon',
        placeholder='Enter search query...',
        description='Query:',
        layout=widgets.Layout(width='400px')
    )

    gdino_confidence_slider = widgets.FloatSlider(
        value=CONFIG['GROUNDING_DINO_CONFIDENCE'],
        min=0.1, max=0.9, step=0.05,
        description='GroundingDINO Confidence:',
        readout_format='.2f',
        layout=widgets.Layout(width='300px')
    )

    clip_threshold_slider = widgets.FloatSlider(
        value=CONFIG['CLIP_SIMILARITY_THRESHOLD'],
        min=0.1, max=0.8, step=0.05,
        description='CLIP Threshold:',
        readout_format='.2f',
        layout=widgets.Layout(width='300px')
    )

    comparison_mode_checkbox = widgets.Checkbox(
        value=CONFIG['COMPARISON_MODE'],
        description='Comparison Mode',
        indent=False
    )

    search_button = widgets.Button(
        description='🔍 Search Both',
        button_style='primary',
        layout=widgets.Layout(width='140px')
    )

    copy_button = widgets.Button(
        description='📋 Copy Results',
        button_style='success',
        layout=widgets.Layout(width='140px'),
        disabled=True
    )

    reload_button = widgets.Button(
        description='🔄 Reload Models',
        button_style='info',
        layout=widgets.Layout(width='140px')
    )

    output_area = widgets.Output()

    # State
    search_results = {}

    def on_reload_models(b):
        with output_area:
            print("🔄 Reloading models...")
            CONFIG['GROUNDING_DINO_MODEL'] = grounding_dino_selector.value
            CONFIG['CLIP_MODEL'] = clip_selector.value

            success = combined_models.load_both_models()
            if success:
                print("✅ Models reloaded successfully!")
            else:
                print("❌ Failed to reload models")

    def on_search(b):
        nonlocal search_results
        with output_area:
            clear_output(wait=True)

            if not combined_models.grounding_dino_model and not combined_models.clip_model:
                print("❌ No models loaded")
                return

            query = query_input.value.strip()
            if not query:
                print("⚠️ Enter a query")
                return

            # Update config
            CONFIG['GROUNDING_DINO_CONFIDENCE'] = gdino_confidence_slider.value
            CONFIG['CLIP_SIMILARITY_THRESHOLD'] = clip_threshold_slider.value
            CONFIG['COMPARISON_MODE'] = comparison_mode_checkbox.value

            # Search
            search_results = search_with_both_models(
                query, CONFIG['SUSPECTS_GALLERY_PATH'], CONFIG['COMPARISON_MODE']
            )

            if search_results:
                copy_button.disabled = False

                # Show comparison
                comparison = compare_model_results(search_results)

                # Display results
                display_combined_results(search_results)
            else:
                print("🔍 No matches found with either model")
                copy_button.disabled = True

    def on_copy(b):
        with output_area:
            if search_results:
                folder, files = copy_combined_results(search_results, CONFIG['RESULTS_OUTPUT_PATH'])
                print(f"✅ Combined results saved!")
            else:
                print("⚠️ No results to copy")

    # Connect events
    reload_button.on_click(on_reload_models)
    search_button.on_click(on_search)
    copy_button.on_click(on_copy)

    # Layout
    controls = widgets.VBox([
        widgets.HTML("<h3>🚀 Combined GroundingDINO + CLIP Forensic Search</h3>"),
        widgets.HBox([grounding_dino_selector, clip_selector]),
        query_input,
        widgets.HBox([gdino_confidence_slider, clip_threshold_slider]),
        widgets.HBox([comparison_mode_checkbox]),
        widgets.HBox([search_button, copy_button, reload_button]),
        widgets.HTML("<hr>")
    ])

    return widgets.VBox([controls, output_area])

def display_combined_results(results_dict):
    """Display results from both models side by side"""
    gdino_results = results_dict.get('grounding_dino', [])
    clip_results = results_dict.get('clip', [])

    max_display = CONFIG['MAX_RESULTS_DISPLAY']
    gdino_display = gdino_results[:max_display//2]
    clip_display = clip_results[:max_display//2]

    if not gdino_display and not clip_display:
        return

    # Create figure with subplots
    total_results = len(gdino_display) + len(clip_display)
    cols = 2
    rows = max(len(gdino_display), len(clip_display))

    if rows == 0:
        return

    fig, axes = plt.subplots(rows, cols, figsize=(15, 5*rows))

    if rows == 1:
        axes = axes.reshape(1, -1)

    # Plot GroundingDINO results
    for i in range(rows):
        ax = axes[i, 0]

        if i < len(gdino_display):
            result = gdino_display[i]
            ax.imshow(result['image'])

            # Draw bounding boxes
            for box, score in zip(result['boxes'], result['scores']):
                x1, y1, x2, y2 = box.tolist()
                rect = patches.Rectangle(
                    (x1, y1), x2 - x1, y2 - y1,
                    linewidth=2, edgecolor='blue', facecolor='none'
                )
                ax.add_patch(rect)
                ax.text(x1, y1 - 5, f'{score:.2f}',
                       color='blue', fontweight='bold', fontsize=9)

            ax.set_title(f"GroundingDINO: {result['image_path'].name}\nMatches: {len(result['boxes'])}",
                        fontsize=10, color='blue')
        else:
            ax.text(0.5, 0.5, 'No more\nGroundingDINO\nresults',
                   ha='center', va='center', transform=ax.transAxes, fontsize=12)

        ax.axis('off')

    # Plot CLIP results
    for i in range(rows):
        ax = axes[i, 1]

        if i < len(clip_display):
            result = clip_display[i]
            ax.imshow(result['image'])

            # Draw bounding boxes
            for box, score in zip(result['boxes'], result['scores']):
                x1, y1, x2, y2 = box.tolist()
                rect = patches.Rectangle(
                    (x1, y1), x2 - x1, y2 - y1,
                    linewidth=2, edgecolor='red', facecolor='none'
                )
                ax.add_patch(rect)
                ax.text(x1, y1 - 5, f'{score:.2f}',
                       color='red', fontweight='bold', fontsize=9)

            ax.set_title(f"CLIP: {result['image_path'].name}\nMatches: {len(result['boxes'])}",
                        fontsize=10, color='red')
        else:
            ax.text(0.5, 0.5, 'No more\nCLIP\nresults',
                   ha='center', va='center', transform=ax.transAxes, fontsize=12)

        ax.axis('off')

    plt.suptitle(f"Combined Results: GroundingDINO (Blue) vs CLIP (Red)", fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()

# Display interface
interface = create_combined_interface()
display(interface)

# Cell 6: Batch Analysis with Both Models
def run_combined_batch_analysis(custom_queries=None):
    """Run batch analysis with both models"""
    import time
    from datetime import datetime

    # Default forensic queries
    default_queries = [
        "person with weapon",
        "weapon",
        "gun",
        "knife",
        "suspicious person",
        "person running",
        "vehicle",
        "mask",
        "backpack",
        "dark clothing"
    ]

    queries = default_queries + (custom_queries or [])

    print(f"🚀 Starting combined batch analysis with {len(queries)} queries")
    batch_start = time.time()
    batch_start_datetime = datetime.now()

    all_results = {}

    for i, query in enumerate(queries, 1):
        print(f"\n[{i}/{len(queries)}] Processing: '{query}'")

        query_start = time.time()
        results = search_with_both_models(query, CONFIG['SUSPECTS_GALLERY_PATH'])
        query_time = time.time() - query_start

        # Analyze results
        gdino_count = len(results.get('grounding_dino', []))
        clip_count = len(results.get('clip', []))

        all_results[query] = {
            'results': results,
            'grounding_dino_count': gdino_count,
            'clip_count': clip_count,
            'time': query_time
        }

        # Save results if any found
        if gdino_count > 0 or clip_count > 0:
            folder, files = copy_combined_results(results, CONFIG['RESULTS_OUTPUT_PATH'])
            print(f"📁 Saved to: {folder.name}")

    # Calculate totals
    batch_time = time.time() - batch_start
    batch_end_datetime = datetime.now()

    total_gdino_matches = sum(data['grounding_dino_count'] for data in all_results.values())
    total_clip_matches = sum(data['clip_count'] for data in all_results.values())

    # Format duration
    if batch_time >= 3600:
        duration_str = f"{int(batch_time//3600)}h {int((batch_time%3600)//60)}m {batch_time%60:.1f}s"
    elif batch_time >= 60:
        duration_str = f"{int(batch_time//60)}m {batch_time%60:.1f}s"
    else:
        duration_str = f"{batch_time:.1f}s"

    # Print comprehensive summary
    print(f"\n{'='*70}")
    print(f"📊 COMBINED BATCH ANALYSIS SUMMARY")
    print(f"{'='*70}")

    print(f"⏰ Start time: {batch_start_datetime.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"🏁 End time: {batch_end_datetime.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"⏱️ Total duration: {duration_str}")
    print(f"📊 Queries processed: {len(queries)}")
    print(f"📈 Average per query: {batch_time/len(queries):.1f}s")

    print(f"\n🎯 MODEL PERFORMANCE:")
    print(f"   GroundingDINO total matches: {total_gdino_matches}")
    print(f"   CLIP total matches: {total_clip_matches}")

    print(f"\n📋 DETAILED RESULTS:")
    print(f"{'Query':<25} {'GroundingDINO':<12} {'CLIP':<8} {'Time':<8}")
    print(f"-" * 60)

    for query, data in all_results.items():
        gdino_status = "✅" if data['grounding_dino_count'] > 0 else "⚪"
        clip_status = "✅" if data['clip_count'] > 0 else "⚪"

        print(f"{query[:24]:<25} {gdino_status} {data['grounding_dino_count']:<10} {clip_status} {data['clip_count']:<6} {data['time']:.1f}s")

    print(f"{'='*70}")

    return all_results

def analyze_model_agreement(batch_results):
    """Analyze agreement between models across all queries"""
    print(f"\n📊 MODEL AGREEMENT ANALYSIS")
    print(f"{'='*50}")

    agreement_stats = {
        'both_found': 0,
        'grounding_dino_only': 0,
        'clip_only': 0,
        'neither_found': 0
    }

    for query, data in batch_results.items():
        gdino_count = data['grounding_dino_count']
        clip_count = data['clip_count']

        if gdino_count > 0 and clip_count > 0:
            agreement_stats['both_found'] += 1
        elif gdino_count > 0:
            agreement_stats['grounding_dino_only'] += 1
        elif clip_count > 0:
            agreement_stats['clip_only'] += 1
        else:
            agreement_stats['neither_found'] += 1

    total_queries = len(batch_results)

    print(f"🤝 Both models found matches: {agreement_stats['both_found']}/{total_queries} ({agreement_stats['both_found']/total_queries*100:.1f}%)")
    print(f"🎯 GroundingDINO only: {agreement_stats['grounding_dino_only']}/{total_queries} ({agreement_stats['grounding_dino_only']/total_queries*100:.1f}%)")
    print(f"🔍 CLIP only: {agreement_stats['clip_only']}/{total_queries} ({agreement_stats['clip_only']/total_queries*100:.1f}%)")
    print(f"❌ Neither found matches: {agreement_stats['neither_found']}/{total_queries} ({agreement_stats['neither_found']/total_queries*100:.1f}%)")

    # Model effectiveness
    gdino_effective = agreement_stats['both_found'] + agreement_stats['grounding_dino_only']
    clip_effective = agreement_stats['both_found'] + agreement_stats['clip_only']

    print(f"\n📈 MODEL EFFECTIVENESS:")
    print(f"🎯 GroundingDINO effective queries: {gdino_effective}/{total_queries} ({gdino_effective/total_queries*100:.1f}%)")
    print(f"🔍 CLIP effective queries: {clip_effective}/{total_queries} ({clip_effective/total_queries*100:.1f}%)")

    return agreement_stats

def generate_forensic_report(batch_results, save_to_file=True):
    """Generate a comprehensive forensic analysis report"""
    from datetime import datetime

    report_content = []
    report_content.append("="*80)
    report_content.append("FORENSIC IMAGE ANALYSIS REPORT")
    report_content.append("Combined GroundingDINO + CLIP Analysis")
    report_content.append("="*80)
    report_content.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    report_content.append("")

    # Executive Summary
    total_queries = len(batch_results)
    total_gdino = sum(data['grounding_dino_count'] for data in batch_results.values())
    total_clip = sum(data['clip_count'] for data in batch_results.values())
    total_time = sum(data['time'] for data in batch_results.values())

    report_content.append("EXECUTIVE SUMMARY")
    report_content.append("-" * 20)
    report_content.append(f"Total queries processed: {total_queries}")
    report_content.append(f"Total processing time: {total_time/60:.1f} minutes")
    report_content.append(f"GroundingDINO total detections: {total_gdino}")
    report_content.append(f"CLIP total detections: {total_clip}")
    report_content.append(f"Combined unique findings: {total_gdino + total_clip}")
    report_content.append("")

    # Detailed Results
    report_content.append("DETAILED FINDINGS")
    report_content.append("-" * 20)

    for query, data in batch_results.items():
        report_content.append(f"Query: '{query}'")
        report_content.append(f"  GroundingDINO: {data['grounding_dino_count']} detections")
        report_content.append(f"  CLIP: {data['clip_count']} detections")
        report_content.append(f"  Processing time: {data['time']:.1f}s")
        report_content.append("")

    # Model Analysis
    agreement_stats = analyze_model_agreement(batch_results)
    report_content.append("MODEL AGREEMENT ANALYSIS")
    report_content.append("-" * 25)
    report_content.append(f"Queries where both models found matches: {agreement_stats['both_found']}")
    report_content.append(f"Queries where only GroundingDINO found matches: {agreement_stats['grounding_dino_only']}")
    report_content.append(f"Queries where only CLIP found matches: {agreement_stats['clip_only']}")
    report_content.append(f"Queries where neither found matches: {agreement_stats['neither_found']}")
    report_content.append("")

    # Recommendations
    report_content.append("RECOMMENDATIONS")
    report_content.append("-" * 15)
    if agreement_stats['both_found'] > 0:
        report_content.append("• High-confidence detections where both models agree should be prioritized for review")
    if agreement_stats['grounding_dino_only'] > agreement_stats['clip_only']:
        report_content.append("• GroundingDINO showed superior performance for this dataset")
    elif agreement_stats['clip_only'] > agreement_stats['grounding_dino_only']:
        report_content.append("• CLIP showed superior performance for this dataset")
    else:
        report_content.append("• Both models showed comparable performance - use both for comprehensive analysis")

    report_content.append("• Review all detections manually for final verification")
    report_content.append("• Consider adjusting confidence thresholds based on results")
    report_content.append("")
    report_content.append("="*80)

    report_text = "\n".join(report_content)

    # Save to file if requested
    if save_to_file:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        report_path = Path(CONFIG['RESULTS_OUTPUT_PATH']) / f"forensic_report_{timestamp}.txt"

        try:
            with open(report_path, 'w') as f:
                f.write(report_text)
            print(f"📄 Report saved to: {report_path}")
        except Exception as e:
            print(f"❌ Error saving report: {e}")

    print("\n" + report_text)
    return report_text

# Cell 7: Usage Guide and Examples
print("""
🎯 COMBINED GROUNDINGDINO + CLIP FORENSIC SEARCH
==============================================

🚀 FEATURES:
• Dual-model approach for comprehensive detection
• Side-by-side result comparison
• Agreement analysis between models
• Organized output with model-specific folders
• Comprehensive forensic reporting

📋 SETUP:
1. Place images in './suspects_gallery' folder
2. Run cells 1-5 in order
3. Use interface above for interactive search
4. Check both model results and agreements

🔍 MODEL COMPARISON:
• GroundingDINO: Better for complex language understanding
• CLIP: Better for general object recognition
• Combined: Maximum coverage and confidence validation

🎛️ PARAMETERS:
• GroundingDINO Confidence: 0.1-0.9 (default 0.35)
• CLIP Similarity: 0.1-0.8 (default 0.25)
• Comparison Mode: Shows both models side-by-side

📊 OUTPUT STRUCTURE:
search_results/
├── combined_search_20240611_143022/
│   ├── grounding_dino/          # GroundingDINO unique results
│   ├── clip/                    # CLIP unique results
│   ├── both_models/             # Images detected by both
│   └── forensic_report_*.txt    # Analysis report

🔍 RECOMMENDED QUERIES:
• "person with weapon" / "weapon" / "gun" / "knife"
• "suspicious person" / "person running"
• "vehicle" / "suspicious vehicle"
• "mask" / "person wearing mask"
• "backpack" / "bag"

💡 BEST PRACTICES:
• Use both models for critical analysis
• Higher confidence when both models agree
• Review model-specific detections manually
• Adjust thresholds based on dataset characteristics
• Generate reports for documentation

🚨 INTERPRETATION:
• Blue boxes: GroundingDINO detections
• Red boxes: CLIP detections
• Both models agreeing = higher confidence
• Model-specific detections may reveal different aspects

# Example usage:
# batch_results = run_combined_batch_analysis()
# report = generate_forensic_report(batch_results)
""")

# Additional utility functions
def quick_model_test(query="person with weapon"):
    """Quick test of both models"""
    print(f"🧪 Quick test with query: '{query}'")

    if not models_loaded:
        print("❌ Models not loaded properly")
        return

    results = search_with_both_models(query, CONFIG['SUSPECTS_GALLERY_PATH'])

    if results:
        comparison = compare_model_results(results)
        print("✅ Quick test completed - check results above")
        return results
    else:
        print("⚪ No results found in quick test")
        return None

def model_performance_summary():
    """Show current model status and performance info"""
    print("📊 MODEL STATUS SUMMARY")
    print("-" * 30)
    print(f"🎯 GroundingDINO: {'✅ Loaded' if combined_models.grounding_dino_model else '❌ Not loaded'}")
    print(f"   Model: {CONFIG['GROUNDING_DINO_MODEL']}")
    print(f"   Confidence threshold: {CONFIG['GROUNDING_DINO_CONFIDENCE']}")

    print(f"🔍 CLIP: {'✅ Loaded' if combined_models.clip_model else '❌ Not loaded'}")
    print(f"   Model: {CONFIG['CLIP_MODEL']}")
    print(f"   Similarity threshold: {CONFIG['CLIP_SIMILARITY_THRESHOLD']}")

    print(f"🖥️ Device: {CONFIG['DEVICE']}")
    print(f"📁 Gallery: {CONFIG['SUSPECTS_GALLERY_PATH']}")
    print(f"📋 Output: {CONFIG['RESULTS_OUTPUT_PATH']}")

# Show current status
model_performance_summary()

# Uncomment to run batch analysis:
# batch_results = run_combined_batch_analysis()
# analysis_report = generate_forensic_report(batch_results, save_to_file=True)

✅ Configuration loaded successfully
📁 Suspects gallery: ../../datasets/images/objects/raw
📁 Results output: ../../datasets/images/objects/detections
🎯 GroundingDINO: IDEA-Research/grounding-dino-base
🔍 CLIP: ViT-B/32
🚀 Combined model approach for comprehensive analysis
✅ Dependencies already installed
✅ All dependencies imported successfully
🖥️ Using CPU mode
📍 Device: cpu
📁 Directories ready
🚀 Loading both models...
📥 Loading GroundingDINO: IDEA-Research/grounding-dino-base
✅ GroundingDINO loaded successfully
📥 Loading CLIP: ViT-B/32
✅ CLIP loaded successfully
✅ Both models loaded successfully!


VBox(children=(VBox(children=(HTML(value='<h3>🚀 Combined GroundingDINO + CLIP Forensic Search</h3>'), HBox(chi…


🎯 COMBINED GROUNDINGDINO + CLIP FORENSIC SEARCH

🚀 FEATURES:
• Dual-model approach for comprehensive detection
• Side-by-side result comparison
• Agreement analysis between models
• Organized output with model-specific folders
• Comprehensive forensic reporting

📋 SETUP:
1. Place images in './suspects_gallery' folder
2. Run cells 1-5 in order
3. Use interface above for interactive search
4. Check both model results and agreements

🔍 MODEL COMPARISON:
• GroundingDINO: Better for complex language understanding
• CLIP: Better for general object recognition
• Combined: Maximum coverage and confidence validation

🎛️ PARAMETERS:
• GroundingDINO Confidence: 0.1-0.9 (default 0.35)
• CLIP Similarity: 0.1-0.8 (default 0.25)
• Comparison Mode: Shows both models side-by-side

📊 OUTPUT STRUCTURE:
search_results/
├── combined_search_20240611_143022/
│   ├── grounding_dino/          # GroundingDINO unique results
│   ├── clip/                    # CLIP unique results
│   ├── both_models/            