# Explainable Object Detection Analysis

This notebook provides interactive exploration of the explainable object detection framework.

## Contents
1. Setup and Initialization
2. Object Detection with YOLOv8
3. CLIP Semantic Analysis
4. Grad-CAM Saliency Maps
5. LLaVA Explanations
6. Complete Pipeline Run
7. Evaluation Analysis
8. Failure Case Analysis


In [None]:
# Setup
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import torch

# Enable inline plots
%matplotlib inline
plt.rcParams['figure.figsize'] = [12, 8]

# Check GPU
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")


## 1. Initialize Pipeline


In [None]:
from config import get_default_config, update_config_for_cpu
from pipeline import ExplainableDetectionPipeline, create_pipeline

# Create pipeline (set use_llava=True if you have enough GPU memory)
pipeline = create_pipeline(use_gpu=True, use_llava=False)

print("Pipeline initialized!")


## 2. Object Detection with YOLOv8


In [None]:
# Create or load a test image
test_img = np.zeros((480, 640, 3), dtype=np.uint8)
test_img[:] = (180, 180, 180)  # Gray background

# Add colored shapes
cv2.rectangle(test_img, (50, 50), (180, 180), (255, 0, 0), -1)   # Blue
cv2.circle(test_img, (350, 150), 80, (0, 255, 0), -1)           # Green
cv2.rectangle(test_img, (500, 100), (620, 250), (0, 0, 255), -1) # Red

# Or load your own image:
# test_img = cv2.imread('your_image.jpg')
# test_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB)

plt.imshow(test_img)
plt.title('Test Image')
plt.axis('off')
plt.show()


In [None]:
# Run detection
detector = pipeline.detector
detection_result = detector.detect(test_img)

print(f"Found {detection_result.num_detections} detections")
for i, det in enumerate(detection_result.detections):
    print(f"  {i+1}. {det.class_name}: {det.confidence:.1%} at {det.bbox}")

# Visualize detections
vis_img = detector.visualize_detections(detection_result)
plt.imshow(vis_img)
plt.title(f'YOLO Detections ({detection_result.num_detections} objects)')
plt.axis('off')
plt.show()


## 3. Complete Pipeline Run & Analysis


In [None]:
# Run complete pipeline
result = pipeline.process_image(
    test_img,
    max_detections=10,
    generate_explanations=True,
    compute_saliency=True,
    run_evaluation=True
)

print(f"Processing time: {result.processing_time:.2f}s")
print(f"Detections: {result.num_detections}")

# Detailed results
for i, det in enumerate(result.detections):
    print(f"\n{'='*50}")
    print(f"DETECTION {i+1}: {det.detection.class_name}")
    print(f"{'='*50}")
    print(f"Confidence: {det.detection.confidence:.1%}")
    
    if det.semantic_analysis and det.semantic_analysis.semantic_match:
        match = det.semantic_analysis.semantic_match
        print(f"CLIP Alignment: {match.alignment_score:.3f}")
        print(f"Top CLIP matches: {match.top_k_classes[:3]}")
    
    if det.explanation:
        print(f"Explanation: {det.explanation.explanation_text[:100]}...")
    
    if det.evaluation:
        print(f"Quality Score: {det.evaluation.get_overall_score():.3f}")


In [None]:
# Visualize comprehensive results
vis = pipeline.visualize_result(result, 'outputs/pipeline_result.png')
plt.figure(figsize=(16, 12))
plt.imshow(vis)
plt.axis('off')
plt.title('Complete Pipeline Results')
plt.show()


## 4. Save Results


In [None]:
# Save all results
os.makedirs('outputs', exist_ok=True)

# Save JSON results
result.save('outputs/analysis_result.json')
print("Saved results to outputs/analysis_result.json")

# Analyze misalignments between YOLO and CLIP
mismatches = pipeline.analyze_misalignments(result)
if mismatches:
    print(f"\nFound {len(mismatches)} potential misclassifications")
    for mm in mismatches:
        print(f"  YOLO: {mm['yolo_class']} vs CLIP: {mm['clip_top_class']}")
else:
    print("\nNo misalignments - YOLO and CLIP agree!")

print("\nAnalysis complete!")
