In [53]:
from sympy import false

# Installation and Imports
"""
DETR + NLP Suspect Gallery Search System
==========================================
This system combines DETR object detection with multiple NLP models for advanced suspect search.
"""

# Uncomment the following lines if running for the first time
# !pip install transformers torch torchvision Pillow sentence-transformers openai-clip spacy
# !python -m spacy download en_core_web_sm

import os
import json
import shutil
from pathlib import Path
import tkinter as tk
from tkinter import simpledialog, messagebox
import torch
from transformers import DetrImageProcessor, DetrForObjectDetection, pipeline
from sentence_transformers import SentenceTransformer
import clip
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import spacy
from typing import List, Dict, Any, Tuple
import warnings
warnings.filterwarnings('ignore')

print("✅ All imports successful!")

✅ All imports successful!


In [54]:
# Configuration Settings
"""
Configuration Settings
=====================
Modify these settings to customize the system behavior
"""

class Config:
    # Paths
    SUSPECTS_GALLERY_PATH = "../datasets/images/objects/raw"  # Input folder with suspect images
    RESULTS_PATH = "../datasets/images/objects/detr_nlp_results"             # Output folder for results
    TEMP_PATH = "temp_processing"               # Temporary processing folder

    # Model Configuration
    DETR_MODEL = "facebook/detr-resnet-50"      # DETR model for object detection

    # NLP Models (comment/uncomment to enable/disable)
    NLP_MODELS = {
        'sentence_transformer': {
            'enabled': False,
            'model_name': 'all-MiniLM-L6-v2',
            'description': 'Sentence-BERT for semantic similarity'
        },
        'clip': {
            'enabled': False,
            'model_name': 'ViT-B/32',
            'description': 'CLIP for vision-language understanding'
        },
        'spacy_nlp': {
            'enabled': True,
            'model_name': 'en_core_web_sm',
            'description': 'spaCy for NER and linguistic analysis'
        }
    }

    # Detection Settings
    CONFIDENCE_THRESHOLD = 0.7          # Minimum confidence for object detection
    MAX_RESULTS = 10                    # Maximum number of results to return
    SIMILARITY_THRESHOLD = 0.8          # Minimum similarity score for matches

    # Visual Settings
    FIGURE_SIZE = (15, 10)
    BBOX_COLOR = 'red'
    BBOX_LINEWIDTH = 2

config = Config()
print("✅ Configuration loaded successfully!")

✅ Configuration loaded successfully!


In [55]:
 # Utility Functions
"""
Utility Functions
================
Helper functions for file operations and data processing
"""

def setup_directories():
    """Create the necessary directories if they don't exist"""
    directories = [config.SUSPECTS_GALLERY_PATH, config.RESULTS_PATH, config.TEMP_PATH]
    for directory in directories:
        Path(directory).mkdir(parents=True, exist_ok=True)
    print(f"✅ Directories setup complete: {directories}")

def get_user_query():
    """Get a search query from user using GUI dialog"""
    root = tk.Tk()
    root.withdraw()  # Hide the main window

    query = input("Enter your search query (e.g., 'person wearing red jacket', 'man with glasses', 'woman in blue dress'): ")

    root.destroy()
    return query

def load_image_paths(directory: str) -> List[str]:
    """Load all image paths from directory"""
    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'}
    image_paths = []

    for file_path in Path(directory).rglob('*'):
        if file_path.suffix.lower() in image_extensions:
            image_paths.append(str(file_path))

    return image_paths

def copy_results_to_folder(image_paths: List[str], query: str, scores: List[float]):
    """Copy matching images to results folder"""
    # Create query-specific folder
    safe_query = "".join(c for c in query if c.isalnum() or c in (' ', '-', '_')).rstrip()
    query_folder = Path(config.RESULTS_PATH) / safe_query
    query_folder.mkdir(parents=True, exist_ok=True)

    copied_files = []
    for i, (image_path, score) in enumerate(zip(image_paths, scores)):
        # Create new filename with score
        original_name = Path(image_path).stem
        extension = Path(image_path).suffix
        new_name = f"{original_name}_score_{score:.3f}{extension}"

        destination = query_folder / new_name
        shutil.copy2(image_path, destination)
        copied_files.append(str(destination))

    print(f"✅ Copied {len(copied_files)} files to {query_folder}")
    return copied_files

setup_directories()


✅ Directories setup complete: ['../datasets/images/objects/raw', '../datasets/images/objects/detr_nlp_results', 'temp_processing']


In [56]:
# DETR Object Detection Module
"""
DETR Object Detection Module
===========================
Handles object detection using DETR model
"""

class DETRDetector:
    def __init__(self):
        self.processor = DetrImageProcessor.from_pretrained(config.DETR_MODEL)
        self.model = DetrForObjectDetection.from_pretrained(config.DETR_MODEL)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        print(f"✅ DETR model loaded on {self.device}")

    def detect_objects(self, image_path: str) -> Dict[str, Any]:
        """Detect objects in image and return results"""
        try:
            image = Image.open(image_path).convert('RGB')
            inputs = self.processor(images=image, return_tensors="pt").to(self.device)

            with torch.no_grad():
                outputs = self.model(**inputs)

            # Convert outputs to COCO API format
            target_sizes = torch.tensor([image.size[::-1]]).to(self.device)
            results = self.processor.post_process_object_detection(
                outputs, target_sizes=target_sizes, threshold=config.CONFIDENCE_THRESHOLD
            )[0]

            # Extract object information
            objects = []
            for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
                objects.append({
                    'label': self.model.config.id2label[label.item()],
                    'confidence': score.item(),
                    'bbox': box.tolist()
                })

            return {
                'image_path': image_path,
                'objects': objects,
                'image_size': image.size
            }

        except Exception as e:
            print(f"❌ Error processing {image_path}: {e}")
            return {'image_path': image_path, 'objects': [], 'image_size': None}

# Initialize DETR detector
detr_detector = DETRDetector()

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


✅ DETR model loaded on cpu


In [57]:
# NLP Models Module
"""
NLP Models Module
================
Handles multiple NLP models for text analysis and similarity
"""

class NLPModels:
    def __init__(self):
        self.models = {}
        self.load_enabled_models()

    def load_enabled_models(self):
        """Load only enabled NLP models"""

        # Sentence Transformer
        if config.NLP_MODELS['sentence_transformer']['enabled']:
            try:
                self.models['sentence_transformer'] = SentenceTransformer(
                    config.NLP_MODELS['sentence_transformer']['model_name']
                )
                print("✅ Sentence Transformer loaded")
            except Exception as e:
                print(f"❌ Error loading Sentence Transformer: {e}")

        # CLIP
        if config.NLP_MODELS['clip']['enabled']:
            try:
                device = "cuda" if torch.cuda.is_available() else "cpu"
                self.models['clip_model'], self.models['clip_preprocess'] = clip.load(
                    config.NLP_MODELS['clip']['model_name'], device=device
                )
                print("✅ CLIP model loaded")
            except Exception as e:
                print(f"❌ Error loading CLIP: {e}")

        # spaCy NLP
        if config.NLP_MODELS['spacy_nlp']['enabled']:
            try:
                self.models['spacy'] = spacy.load(config.NLP_MODELS['spacy_nlp']['model_name'])
                print("✅ spaCy model loaded")
            except Exception as e:
                print(f"❌ Error loading spaCy: {e}")

    def analyze_query_with_spacy(self, query: str) -> Dict[str, Any]:
        """Analyze query using spaCy for entities and linguistic features"""
        if 'spacy' not in self.models:
            return {}

        doc = self.models['spacy'](query)

        return {
            'entities': [(ent.text, ent.label_) for ent in doc.ents],
            'tokens': [token.text for token in doc],
            'pos_tags': [(token.text, token.pos_) for token in doc],
            'noun_phrases': [chunk.text for chunk in doc.noun_chunks]
        }

    def compute_sentence_similarity(self, query: str, descriptions: List[str]) -> List[float]:
        """Compute similarity using Sentence Transformer"""
        if 'sentence_transformer' not in self.models:
            return [0.0] * len(descriptions)

        try:
            query_embedding = self.models['sentence_transformer'].encode([query])
            desc_embeddings = self.models['sentence_transformer'].encode(descriptions)

            # Compute cosine similarity
            similarities = []
            for desc_emb in desc_embeddings:
                similarity = np.dot(query_embedding[0], desc_emb) / (
                    np.linalg.norm(query_embedding[0]) * np.linalg.norm(desc_emb)
                )
                similarities.append(float(similarity))

            return similarities
        except Exception as e:
            print(f"❌ Error computing sentence similarity: {e}")
            return [0.0] * len(descriptions)

    def compute_clip_similarity(self, query: str, image_paths: List[str]) -> List[float]:
        """Compute similarity using CLIP model"""
        if 'clip_model' not in self.models or 'clip_preprocess' not in self.models:
            return [0.0] * len(image_paths)

        try:
            device = next(self.models['clip_model'].parameters()).device

            # Tokenize text
            text = clip.tokenize([query]).to(device)

            similarities = []
            for img_path in image_paths:
                try:
                    # Preprocess image
                    image = self.models['clip_preprocess'](
                        Image.open(img_path).convert('RGB')
                    ).unsqueeze(0).to(device)

                    # Compute features
                    with torch.no_grad():
                        text_features = self.models['clip_model'].encode_text(text)
                        image_features = self.models['clip_model'].encode_image(image)

                        # Compute similarity
                        similarity = torch.cosine_similarity(text_features, image_features)
                        similarities.append(float(similarity.cpu().numpy()[0]))

                except Exception as e:
                    print(f"❌ Error processing image {img_path}: {e}")
                    similarities.append(0.0)

            return similarities

        except Exception as e:
            print(f"❌ Error computing CLIP similarity: {e}")
            return [0.0] * len(image_paths)

# Initialize NLP models
nlp_models = NLPModels()

✅ spaCy model loaded


In [58]:
# Search Engine Module
"""
Search Engine Module
===================
Main search engine that combines DETR and NLP models
"""

class SuspectGallerySearchEngine:
    def __init__(self):
        self.detr_detector = detr_detector
        self.nlp_models = nlp_models
        self.image_cache = {}  # Cache for detected objects

    def create_image_description(self, detection_result: Dict[str, Any]) -> str:
        """Create text description from DETR detection results"""
        objects = detection_result['objects']
        if not objects:
            return "image with no detected objects"

        # Group objects by label
        object_counts = {}
        for obj in objects:
            label = obj['label']
            object_counts[label] = object_counts.get(label, 0) + 1

        # Create description
        descriptions = []
        for label, count in object_counts.items():
            if count == 1:
                descriptions.append(f"a {label}")
            else:
                descriptions.append(f"{count} {label}s")

        return "image containing " + ", ".join(descriptions)

    def search_gallery(self, query: str) -> List[Dict[str, Any]]:
        """Main search function"""
        print(f"🔍 Starting search for: '{query}'")

        # Load all images
        image_paths = load_image_paths(config.SUSPECTS_GALLERY_PATH)
        if not image_paths:
            print("❌ No images found in suspects gallery!")
            return []

        print(f"📂 Found {len(image_paths)} images to process")

        # Analyze query with spaCy
        query_analysis = self.nlp_models.analyze_query_with_spacy(query)
        print(f"🧠 Query analysis: {query_analysis}")

        # Process images with DETR
        print("🔍 Detecting objects in images...")
        detection_results = []
        for i, img_path in enumerate(image_paths):
            if i % 10 == 0:
                print(f"   Processing image {i+1}/{len(image_paths)}")

            result = self.detr_detector.detect_objects(img_path)
            detection_results.append(result)

        # Create descriptions for each image
        descriptions = [self.create_image_description(result) for result in detection_results]

        # Compute similarities using different NLP models
        similarities = {}

        # Sentence Transformer similarity
        if config.NLP_MODELS['sentence_transformer']['enabled']:
            similarities['sentence_transformer'] = self.nlp_models.compute_sentence_similarity(
                query, descriptions
            )

        # CLIP similarity
        if config.NLP_MODELS['clip']['enabled']:
            similarities['clip'] = self.nlp_models.compute_clip_similarity(query, image_paths)

        # Combine similarities (weighted average)
        combined_similarities = []
        for i in range(len(image_paths)):
            scores = []

            if 'sentence_transformer' in similarities:
                scores.append(similarities['sentence_transformer'][i] * 0.4)

            if 'clip' in similarities:
                scores.append(similarities['clip'][i] * 0.6)

            combined_score = sum(scores) / len(scores) if scores else 0.0
            combined_similarities.append(combined_score)

        # Create results
        results = []
        for i, (img_path, detection_result, description, combined_score) in enumerate(
            zip(image_paths, detection_results, descriptions, combined_similarities)
        ):
            result = {
                'rank': i + 1,
                'image_path': img_path,
                'combined_score': combined_score,
                'description': description,
                'detected_objects': detection_result['objects'],
                'individual_scores': {
                    key: similarities[key][i] for key in similarities.keys()
                }
            }
            results.append(result)

        # Sort by combined score and filter by threshold
        results = [r for r in results if r['combined_score'] >= config.SIMILARITY_THRESHOLD]
        results.sort(key=lambda x: x['combined_score'], reverse=True)

        # Limit results
        results = results[:config.MAX_RESULTS]

        # Update ranks
        for i, result in enumerate(results):
            result['rank'] = i + 1

        print(f"✅ Search completed! Found {len(results)} matching results.")
        return results

# Initialize search engine
search_engine = SuspectGallerySearchEngine()

In [59]:
# Visualization Module
"""
Visualization Module
===================
Functions for displaying search results and detected objects
"""

def visualize_search_results(results: List[Dict[str, Any]], query: str):
    """Visualize search results with detected objects"""
    if not results:
        print("No results to display")
        return

    # Calculate grid size
    n_results = len(results)
    cols = min(3, n_results)
    rows = (n_results + cols - 1) // cols

    fig, axes = plt.subplots(rows, cols, figsize=config.FIGURE_SIZE)
    fig.suptitle(f'Search Results for: "{query}"', fontsize=16, fontweight='bold')

    # Handle single result case
    if n_results == 1:
        axes = [axes]
    elif rows == 1:
        axes = axes if isinstance(axes, list) else [axes]
    else:
        axes = axes.flatten()

    for i, result in enumerate(results):
        try:
            # Load and display image
            image = Image.open(result['image_path']).convert('RGB')
            axes[i].imshow(image)

            # Draw bounding boxes for detected objects
            for obj in result['detected_objects']:
                bbox = obj['bbox']
                x1, y1, x2, y2 = bbox
                width = x2 - x1
                height = y2 - y1

                # Create rectangle
                rect = plt.Rectangle(
                    (x1, y1), width, height,
                    linewidth=config.BBOX_LINEWIDTH,
                    edgecolor=config.BBOX_COLOR,
                    facecolor='none'
                )
                axes[i].add_patch(rect)

                # Add label
                axes[i].text(
                    x1, y1 - 5,
                    f"{obj['label']} ({obj['confidence']:.2f})",
                    fontsize=8,
                    color=config.BBOX_COLOR,
                    bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.7)
                )

            # Set title with ranking and score
            title = f"Rank {result['rank']}: Score {result['combined_score']:.3f}\n"
            title += f"File: {Path(result['image_path']).name}"
            axes[i].set_title(title, fontsize=10)
            axes[i].axis('off')

        except Exception as e:
            axes[i].text(0.5, 0.5, f"Error loading image:\n{e}",
                        ha='center', va='center', transform=axes[i].transAxes)
            axes[i].set_title(f"Rank {result['rank']}: Error")
            axes[i].axis('off')

    # Hide empty subplots
    for i in range(n_results, len(axes)):
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

def display_results_summary(results: List[Dict[str, Any]], query: str):
    """Display detailed summary of search results"""
    print(f"\n{'='*60}")
    print(f"SEARCH RESULTS SUMMARY")
    print(f"{'='*60}")
    print(f"Query: '{query}'")
    print(f"Total matches found: {len(results)}")
    print(f"{'='*60}")

    for result in results:
        print(f"\nRank {result['rank']}:")
        print(f"  📄 File: {Path(result['image_path']).name}")
        print(f"  🎯 Combined Score: {result['combined_score']:.4f}")
        print(f"  📝 Description: {result['description']}")

        # Show individual model scores
        print(f"  📊 Individual Scores:")
        for model_name, score in result['individual_scores'].items():
            print(f"     - {model_name}: {score:.4f}")

        # Show detected objects
        if result['detected_objects']:
            print(f"  🔍 Detected Objects:")
            for obj in result['detected_objects'][:3]:  # Show top 3 objects
                print(f"     - {obj['label']} (confidence: {obj['confidence']:.3f})")

        print(f"  {'-'*50}")


In [60]:
# Main Execution Cell
"""
Main Execution Cell
==================
Run the complete suspect gallery search system
"""

def main():
    """Main function to run the suspect gallery search"""
    try:
        print("🚀 Starting Suspect Gallery Search System")
        print("="*60)

        # Check if suspects gallery exists and has images
        if not Path(config.SUSPECTS_GALLERY_PATH).exists():
            print(f"❌ Suspects gallery not found at: {config.SUSPECTS_GALLERY_PATH}")
            print("Please create the folder and add suspect images.")
            return

        image_paths = load_image_paths(config.SUSPECTS_GALLERY_PATH)
        if not image_paths:
            print(f"❌ No images found in: {config.SUSPECTS_GALLERY_PATH}")
            print("Please add images to the suspects gallery folder.")
            return

        print(f"✅ Found {len(image_paths)} images in suspects gallery")

        # Get search query from user
        query = get_user_query()
        if not query:
            print("❌ No query provided. Search cancelled.")
            return

        print(f"🔍 Search query: '{query}'")

        # Perform search
        results = search_engine.search_gallery(query)

        if not results:
            print("❌ No matching results found.")
            print("Try adjusting the similarity threshold or using different search terms.")
            return

        # Display results
        display_results_summary(results, query)
        visualize_search_results(results, query)

        # Copy results to a folder
        result_image_paths = [r['image_path'] for r in results]
        result_scores = [r['combined_score'] for r in results]
        copied_files = copy_results_to_folder(result_image_paths, query, result_scores)

        print(f"\n✅ Search completed successfully!")
        print(f"📁 Results saved to: {Path(config.RESULTS_PATH) / query}")

        # Show enabled models
        enabled_models = [name for name, settings in config.NLP_MODELS.items() if settings['enabled']]
        print(f"🤖 Models used: {', '.join(enabled_models)}")

    except Exception as e:
        print(f"❌ Error during search: {e}")
        import traceback
        traceback.print_exc()

# Run the main function
if __name__ == "__main__":
    main()

🚀 Starting Suspect Gallery Search System
✅ Found 121 images in suspects gallery
🔍 Search query: 'man with a gun'
🔍 Starting search for: 'man with a gun'
📂 Found 121 images to process
🧠 Query analysis: {'entities': [], 'tokens': ['man', 'with', 'a', 'gun'], 'pos_tags': [('man', 'NOUN'), ('with', 'ADP'), ('a', 'DET'), ('gun', 'NOUN')], 'noun_phrases': ['man', 'a gun']}
🔍 Detecting objects in images...
   Processing image 1/121
   Processing image 11/121
   Processing image 21/121
   Processing image 31/121
   Processing image 41/121
   Processing image 51/121
   Processing image 61/121
   Processing image 71/121
   Processing image 81/121
   Processing image 91/121
   Processing image 101/121
   Processing image 111/121
   Processing image 121/121
✅ Search completed! Found 0 matching results.
❌ No matching results found.
Try adjusting the similarity threshold or using different search terms.


In [61]:
# Testing and Demo Cell (Optional)
"""
Testing and Demo Cell
====================
Test the system with sample data (optional)
"""

def create_sample_data():
    """Create sample data for testing (optional)"""
    print("Creating sample data structure...")

    # Create sample directories
    sample_dir = Path("sample_suspects")
    sample_dir.mkdir(exist_ok=True)

    print(f"✅ Sample directory created: {sample_dir}")
    print("Add some test images to this directory and update config.SUSPECTS_GALLERY_PATH")
    print("Then run the main() function again.")

def show_system_info():
    """Display system information and configuration"""
    print("🔧 SYSTEM CONFIGURATION")
    print("="*50)
    print(f"Suspects Gallery Path: {config.SUSPECTS_GALLERY_PATH}")
    print(f"Results Path: {config.RESULTS_PATH}")
    print(f"DETR Model: {config.DETR_MODEL}")
    print(f"Confidence Threshold: {config.CONFIDENCE_THRESHOLD}")
    print(f"Similarity Threshold: {config.SIMILARITY_THRESHOLD}")
    print(f"Max Results: {config.MAX_RESULTS}")

    print("\n🤖 NLP MODELS STATUS")
    print("="*50)
    for model_name, settings in config.NLP_MODELS.items():
        status = "✅ ENABLED" if settings['enabled'] else "❌ DISABLED"
        print(f"{model_name}: {status}")
        print(f"   Model: {settings['model_name']}")
        print(f"   Description: {settings['description']}")

    print(f"\n💻 DEVICE INFO")
    print("="*50)
    print(f"PyTorch CUDA Available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name(0)}")

# Show system information
show_system_info()

# Uncomment the following line to create sample data structure
# create_sample_data()

🔧 SYSTEM CONFIGURATION
Suspects Gallery Path: ../datasets/images/objects/raw
Results Path: ../datasets/images/objects/detr_nlp_results
DETR Model: facebook/detr-resnet-50
Confidence Threshold: 0.7
Similarity Threshold: 0.8
Max Results: 10

🤖 NLP MODELS STATUS
sentence_transformer: ❌ DISABLED
   Model: all-MiniLM-L6-v2
   Description: Sentence-BERT for semantic similarity
clip: ❌ DISABLED
   Model: ViT-B/32
   Description: CLIP for vision-language understanding
spacy_nlp: ✅ ENABLED
   Model: en_core_web_sm
   Description: spaCy for NER and linguistic analysis

💻 DEVICE INFO
PyTorch CUDA Available: False
