In [6]:
# =============================================================================
# JUPYTER NOTEBOOK DETR + NLP GALLERY SEARCH
# Run each cell sequentially
# =============================================================================

# CELL 1: Install Requirements (run once)
!pip install torch transformers sentence-transformers pillow

# CELL 2: Imports and Configuration
import os
import shutil
import json
from pathlib import Path
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configuration - Edit these as needed
CONFIG = {
    # Paths
    'suspect_gallery': '../datasets/images/objects/raw',
    'results_folder': '../datasets/images/objects/detr_nlp_results',

    # Model Selection
    'approach': 'detr_sentence_similarity',  # 'detr_sentence_similarity', 'detr_clip', 'owlvit_direct'
    'detection_model': 'facebook/detr-resnet-50',
    'nlp_model': 'sentence-transformers/all-MiniLM-L6-v2',
    'clip_model': 'openai/clip-vit-base-patch32',

    # Settings
    'detection_confidence': 0.6,
    'similarity_threshold': 0.3,
    'max_results': 20,
    'show_progress': True,
    'image_formats': ['.jpg', '.jpeg', '.png', '.bmp', '.tiff'],
}

print("✅ Configuration loaded")
print(f"   Approach: {CONFIG['approach']}")
print(f"   Gallery: {CONFIG['suspect_gallery']}")
print(f"   Results: {CONFIG['results_folder']}")

# CELL 3: Setup Folders
def setup_folders():
    """Create necessary folders"""
    folders = [CONFIG['suspect_gallery'], CONFIG['results_folder'], './temp_detections']

    for folder in folders:
        Path(folder).mkdir(exist_ok=True)
        print(f"📁 Created: {folder}")

    print(f"\n💡 Next steps:")
    print(f"1. Add suspect images to: {CONFIG['suspect_gallery']}")
    print(f"2. Run the model loading cell")
    print(f"3. Start searching!")

# Run setup
setup_folders()

# CELL 4: Model Loading Class
class JupyterGallerySearcher:
    """Jupyter-friendly gallery searcher"""

    def __init__(self, config=None):
        self.config = config or CONFIG
        self.models_loaded = False

    def load_models(self):
        """Load models based on selected approach"""
        print(f"🤖 Loading models for: {self.config['approach']}")

        try:
            if self.config['approach'] == 'detr_sentence_similarity':
                self._load_detr_sentence()
            elif self.config['approach'] == 'detr_clip':
                self._load_detr_clip()
            elif self.config['approach'] == 'owlvit_direct':
                self._load_owlvit()

            self.models_loaded = True
            print("✅ Models loaded successfully!")

        except Exception as e:
            print(f"❌ Model loading failed: {e}")
            print("💡 Make sure you ran the install cell first")
            raise

    def _load_detr_sentence(self):
        """Load DETR + Sentence Transformer"""
        from transformers import AutoProcessor, AutoModelForObjectDetection
        from sentence_transformers import SentenceTransformer

        print("   Loading DETR...")
        self.detr_processor = AutoProcessor.from_pretrained(self.config['detection_model'])
        self.detr_model = AutoModelForObjectDetection.from_pretrained(self.config['detection_model'])

        print("   Loading Sentence Transformer...")
        self.nlp_model = SentenceTransformer(self.config['nlp_model'])

        print("   ✅ DETR + Sentence Transformer ready")

    def _load_detr_clip(self):
        """Load DETR + CLIP"""
        from transformers import AutoProcessor, AutoModelForObjectDetection, CLIPProcessor, CLIPModel

        print("   Loading DETR...")
        self.detr_processor = AutoProcessor.from_pretrained(self.config['detection_model'])
        self.detr_model = AutoModelForObjectDetection.from_pretrained(self.config['detection_model'])

        print("   Loading CLIP...")
        self.clip_processor = CLIPProcessor.from_pretrained(self.config['clip_model'])
        self.clip_model = CLIPModel.from_pretrained(self.config['clip_model'])

        print("   ✅ DETR + CLIP ready")

    def _load_owlvit(self):
        """Load OWL-ViT"""
        from transformers import OwlViTProcessor, OwlViTForObjectDetection

        print("   Loading OWL-ViT...")
        self.owlvit_processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
        self.owlvit_model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")

        print("   ✅ OWL-ViT ready")

    def get_gallery_images(self):
        """Get all images from gallery"""
        gallery_path = Path(self.config['suspect_gallery'])

        if not gallery_path.exists():
            print(f"❌ Gallery folder not found: {gallery_path}")
            return []

        images = []
        for ext in self.config['image_formats']:
            images.extend(gallery_path.glob(f"*{ext}"))
            images.extend(gallery_path.glob(f"*{ext.upper()}"))

        return sorted(images)

    def search_gallery(self, query, session_name=None):
        """Search gallery with natural language query"""
        if not self.models_loaded:
            print("❌ Models not loaded! Run the model loading cell first.")
            return []

        if session_name is None:
            session_name = f"search_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        print(f"🔍 Searching gallery for: '{query}'")

        # Get images
        gallery_images = self.get_gallery_images()

        if not gallery_images:
            print(f"❌ No images found in {self.config['suspect_gallery']}")
            print(f"💡 Add images to the gallery folder first")
            return []

        print(f"   📁 Processing {len(gallery_images)} images...")

        # Search based on approach
        if self.config['approach'] == 'detr_sentence_similarity':
            matches = self._search_detr_sentence(gallery_images, query)
        elif self.config['approach'] == 'detr_clip':
            matches = self._search_detr_clip(gallery_images, query)
        elif self.config['approach'] == 'owlvit_direct':
            matches = self._search_owlvit(gallery_images, query)

        # Copy results
        if matches:
            self._copy_results(matches, query, session_name)
            print(f"✅ Found {len(matches)} matches!")
            print(f"📸 Results copied to: {self.config['results_folder']}/{session_name}")

            # Show top matches
            print(f"\n🏆 Top matches:")
            for i, match in enumerate(matches[:3], 1):
                score = match['similarity_score']
                path = Path(match['image_path']).name
                print(f"   {i}. {path} (score: {score:.3f})")
        else:
            print(f"❌ No matches found for: '{query}'")
            print(f"💡 Try lowering similarity_threshold or different query")

        return matches

    def _search_detr_sentence(self, gallery_images, query):
        """DETR + Sentence similarity search"""
        import torch
        from PIL import Image

        matches = []
        query_embedding = self.nlp_model.encode([query])

        for i, image_path in enumerate(gallery_images):
            if i % 5 == 0:  # Progress every 5 images
                print(f"      📸 {i+1}/{len(gallery_images)}")

            try:
                # Load and process image
                image = Image.open(image_path).convert('RGB')
                inputs = self.detr_processor(images=image, return_tensors="pt")

                # DETR detection
                with torch.no_grad():
                    outputs = self.detr_model(**inputs)

                # Post-process
                target_sizes = torch.tensor([image.size[::-1]])
                results = self.detr_processor.post_process_object_detection(
                    outputs, target_sizes=target_sizes,
                    threshold=self.config['detection_confidence']
                )[0]

                # Get detected objects
                detected_objects = []
                for label_id in results["labels"]:
                    label_name = self.detr_model.config.id2label[label_id.item()]
                    detected_objects.append(label_name)

                if detected_objects:
                    # Create scene description
                    scene_description = f"Image contains: {', '.join(detected_objects)}"

                    # Calculate similarity
                    scene_embedding = self.nlp_model.encode([scene_description])
                    similarity = self.nlp_model.similarity(query_embedding, scene_embedding)[0][0].item()

                    if similarity >= self.config['similarity_threshold']:
                        matches.append({
                            'image_path': str(image_path),
                            'similarity_score': similarity,
                            'detected_objects': detected_objects,
                            'scene_description': scene_description
                        })

            except Exception as e:
                print(f"      ⚠️ Error with {image_path.name}: {e}")

        # Sort by similarity
        matches.sort(key=lambda x: x['similarity_score'], reverse=True)
        return matches[:self.config['max_results']]

    def _search_detr_clip(self, gallery_images, query):
        """DETR + CLIP search"""
        import torch
        from PIL import Image

        matches = []

        for i, image_path in enumerate(gallery_images):
            if i % 5 == 0:
                print(f"      📸 {i+1}/{len(gallery_images)}")

            try:
                image = Image.open(image_path).convert('RGB')

                # DETR detection
                inputs = self.detr_processor(images=image, return_tensors="pt")
                with torch.no_grad():
                    outputs = self.detr_model(**inputs)

                target_sizes = torch.tensor([image.size[::-1]])
                results = self.detr_processor.post_process_object_detection(
                    outputs, target_sizes=target_sizes,
                    threshold=self.config['detection_confidence']
                )[0]

                if len(results["boxes"]) > 0:
                    # CLIP similarity
                    clip_inputs = self.clip_processor(text=[query], images=image, return_tensors="pt", padding=True)
                    with torch.no_grad():
                        clip_outputs = self.clip_model(**clip_inputs)
                        logits_per_image = clip_outputs.logits_per_image
                        similarity = torch.nn.functional.softmax(logits_per_image, dim=1)[0][0].item()

                    if similarity >= self.config['similarity_threshold']:
                        detected_objects = []
                        for label_id in results["labels"]:
                            label_name = self.detr_model.config.id2label[label_id.item()]
                            detected_objects.append(label_name)

                        matches.append({
                            'image_path': str(image_path),
                            'similarity_score': similarity,
                            'detected_objects': detected_objects
                        })

            except Exception as e:
                print(f"      ⚠️ Error with {image_path.name}: {e}")

        matches.sort(key=lambda x: x['similarity_score'], reverse=True)
        return matches[:self.config['max_results']]

    def _search_owlvit(self, gallery_images, query):
        """OWL-ViT direct search"""
        import torch
        from PIL import Image

        matches = []

        for i, image_path in enumerate(gallery_images):
            if i % 5 == 0:
                print(f"      📸 {i+1}/{len(gallery_images)}")

            try:
                image = Image.open(image_path).convert('RGB')
                inputs = self.owlvit_processor(text=[query], images=image, return_tensors="pt")

                with torch.no_grad():
                    outputs = self.owlvit_model(**inputs)

                target_sizes = torch.Tensor([image.size[::-1]])
                results = self.owlvit_processor.post_process_object_detection(
                    outputs=outputs,
                    target_sizes=target_sizes,
                    threshold=self.config['similarity_threshold']
                )

                if results and len(results) > 0 and len(results[0]["boxes"]) > 0:
                    result = results[0]
                    scores = result["scores"]
                    avg_confidence = scores.mean().item()

                    matches.append({
                        'image_path': str(image_path),
                        'similarity_score': avg_confidence,
                        'detections': len(result["boxes"])
                    })

            except Exception as e:
                print(f"      ⚠️ Error with {image_path.name}: {e}")

        matches.sort(key=lambda x: x['similarity_score'], reverse=True)
        return matches[:self.config['max_results']]

    def _copy_results(self, matches, query, session_name):
        """Copy results to results folder"""
        session_folder = Path(self.config['results_folder']) / session_name
        session_folder.mkdir(exist_ok=True)

        # Save metadata
        search_metadata = {
            'query': query,
            'session_name': session_name,
            'timestamp': datetime.now().isoformat(),
            'approach': self.config['approach'],
            'total_matches': len(matches),
            'matches': matches
        }

        with open(session_folder / 'search_metadata.json', 'w') as f:
            json.dump(search_metadata, f, indent=2)

        # Copy images
        for i, match in enumerate(matches):
            try:
                source_path = Path(match['image_path'])
                similarity_score = match['similarity_score']
                new_name = f"{i+1:03d}_score_{similarity_score:.3f}_{source_path.name}"
                dest_path = session_folder / new_name
                shutil.copy2(source_path, dest_path)
            except Exception as e:
                print(f"      ❌ Copy error: {e}")

# Initialize the searcher
searcher = JupyterGallerySearcher(CONFIG)
print("✅ Gallery searcher ready")

# CELL 5: Load Models (run this after adding images to gallery)
print("🤖 Loading models...")
searcher.load_models()

# CELL 6: Quick Gallery Check
gallery_images = searcher.get_gallery_images()
print(f"📁 Gallery status:")
print(f"   Location: {CONFIG['suspect_gallery']}")
print(f"   Images found: {len(gallery_images)}")

if gallery_images:
    print(f"   Sample images:")
    for i, img in enumerate(gallery_images[:5], 1):
        print(f"     {i}. {img.name}")
    if len(gallery_images) > 5:
        print(f"     ... and {len(gallery_images) - 5} more")
else:
    print(f"   ❌ No images found!")
    print(f"   💡 Add images to: {CONFIG['suspect_gallery']}")

# CELL 7: Search Examples - Run these to test
def run_search_examples():
    """Run example searches"""

    if not searcher.models_loaded:
        print("❌ Load models first!")
        return

    example_queries = [
        # "person",
        # "person with phone",
        # "person holding object",
        # "car",
        # "person near car"
    ]

    print("🔍 Running example searches...")

    for query in example_queries:
        print(f"\n" + "="*50)
        matches = searcher.search_gallery(query, f"example_{query.replace(' ', '_')}")

        if matches:
            print(f"✅ '{query}': {len(matches)} matches")
        else:
            print(f"❌ '{query}': No matches")

    print(f"\n🎯 Example searches complete!")
    print(f"📂 Check results in: {CONFIG['results_folder']}")

# Uncomment to run examples:
# run_search_examples()

# CELL 8: Custom Search Function
def search_custom(query, approach=None, threshold=None):
    """
    Custom search function for easy testing

    Args:
        query: Search query string
        approach: 'detr_sentence_similarity', 'detr_clip', 'owlvit_direct'
        threshold: Similarity threshold (0.1-0.9)
    """

    # Update config if parameters provided
    if approach:
        CONFIG['approach'] = approach
        print(f"🔄 Switched to approach: {approach}")
        searcher.config['approach'] = approach
        # Note: You'll need to reload models if switching approaches

    if threshold:
        CONFIG['similarity_threshold'] = threshold
        searcher.config['similarity_threshold'] = threshold
        print(f"🎯 Set threshold to: {threshold}")

    # Run search
    matches = searcher.search_gallery(query)

    # Display results
    if matches:
        print(f"\n📊 Search Results for '{query}':")
        print(f"   Total matches: {len(matches)}")
        print(f"   Approach: {CONFIG['approach']}")
        print(f"   Threshold: {CONFIG['similarity_threshold']}")

        print(f"\n🏆 Top 5 matches:")
        for i, match in enumerate(matches[:5], 1):
            score = match['similarity_score']
            path = Path(match['image_path']).name
            print(f"   {i}. {path} - Score: {score:.3f}")

    return matches

# CELL 9: Interactive Search Cell - Edit and run as needed

# Example searches - edit these and run:

# Basic search
# matches1 = search_custom("person")

# More specific search
matches2 = search_custom("person holding gum")

# With custom threshold (more sensitive)
# matches3 = search_custom("person with phone", threshold=0.2)

# Different approach
# Note: Need to reload models when switching approaches
# matches4 = search_custom("weapon", approach='owlvit_direct')

print("🎯 Search complete! Check the results folders.")

# CELL 10: Experiment with Different Settings
def experiment_settings():
    """Experiment with different thresholds and approaches"""

    test_query = "person"  # Change this to your test query
    thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]

    print(f"🧪 Experimenting with query: '{test_query}'")
    print(f"📊 Testing different thresholds...")

    results_summary = {}

    for threshold in thresholds:
        matches = search_custom(test_query, threshold=threshold)
        results_summary[threshold] = len(matches)
        print(f"   Threshold {threshold}: {len(matches)} matches")

    print(f"\n📈 Results Summary:")
    for threshold, count in results_summary.items():
        print(f"   {threshold}: {count} matches")

    return results_summary

# Uncomment to run experiment:
# experiment_results = experiment_settings()

# CELL 11: Analyze Results
def analyze_results():
    """Analyze search results"""
    results_path = Path(CONFIG['results_folder'])

    print(f"📊 Results Analysis")
    print(f"📂 Results folder: {results_path}")

    if not results_path.exists():
        print("❌ No results folder found")
        return

    sessions = list(results_path.iterdir())

    if not sessions:
        print("❌ No search sessions found")
        return

    print(f"📋 Found {len(sessions)} search sessions:")

    for session in sessions:
        if session.is_dir():
            metadata_file = session / 'search_metadata.json'

            if metadata_file.exists():
                try:
                    with open(metadata_file, 'r') as f:
                        metadata = json.load(f)

                    print(f"\n📁 {session.name}")
                    print(f"   Query: '{metadata['query']}'")
                    print(f"   Matches: {metadata['total_matches']}")
                    print(f"   Approach: {metadata['approach']}")

                    if metadata['matches']:
                        best_score = metadata['matches'][0]['similarity_score']
                        print(f"   Best score: {best_score:.3f}")

                except Exception as e:
                    print(f"   ❌ Error reading metadata: {e}")

# Run analysis
analyze_results()

print("\n🎉 Jupyter DETR+NLP System Ready!")
print("💡 Edit the search queries in the cells above and run them to test different approaches")

✅ Configuration loaded
   Approach: detr_sentence_similarity
   Gallery: ../datasets/images/objects/raw
   Results: ../datasets/images/objects/detr_nlp_results
📁 Created: ../datasets/images/objects/raw
📁 Created: ../datasets/images/objects/detr_nlp_results
📁 Created: ./temp_detections

💡 Next steps:
1. Add suspect images to: ../datasets/images/objects/raw
2. Run the model loading cell
3. Start searching!
✅ Gallery searcher ready
🤖 Loading models...
🤖 Loading models for: detr_sentence_similarity
   Loading DETR...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


   Loading Sentence Transformer...
   ✅ DETR + Sentence Transformer ready
✅ Models loaded successfully!
📁 Gallery status:
   Location: ../datasets/images/objects/raw
   Images found: 242
   Sample images:
     1. 445_jpg.rf.2e04379013684f454abbc00564910fcc.jpg
     2. 445_jpg.rf.2e04379013684f454abbc00564910fcc.jpg
     3. ABbframe00154_jpg.rf.d56723a3de6d775966551d0d0e8add67.jpg
     4. ABbframe00154_jpg.rf.d56723a3de6d775966551d0d0e8add67.jpg
     5. ABbframe00160_jpg.rf.23c17f2cfeacd7873e38995e4832e0c1.jpg
     ... and 237 more
🔍 Searching gallery for: 'person holding gum'
   📁 Processing 242 images...
      📸 1/242
      📸 6/242
      📸 11/242
      📸 16/242
      📸 21/242
      📸 26/242
      📸 31/242
      📸 36/242
      📸 41/242
      📸 46/242
      📸 51/242
      📸 56/242
      📸 61/242
      📸 66/242
      📸 71/242
      📸 76/242
      📸 81/242
      📸 86/242
      📸 91/242
      📸 96/242
      📸 101/242
      📸 106/242
      📸 111/242
      📸 116/242
      📸 121/242
      📸 1