In [None]:
# Phase 1 Integration Test
import sys
sys.path.append('..')

from src.yolo_detector import YOLODetector
from src.clip_classifier import CLIPClassifier
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

# Initialize models
print("Initializing models...")
yolo_detector = YOLODetector('yolov8n.pt')
clip_classifier = CLIPClassifier()

# Test image
test_image = "https://ultralytics.com/images/bus.jpg"

# Step 1: YOLO Detection
print("Step 1: Running YOLO detection...")
yolo_results = yolo_detector.detect(test_image)
boxes, scores, class_ids = yolo_detector.extract_detections(yolo_results)

print(f"YOLO detected {len(boxes)} objects")

# Step 2: CLIP Classification on crops
print("\nStep 2: Testing CLIP on image crops...")
image = Image.open(Image.open(requests.get(test_image, stream=True).raw))

# Test CLIP on first few detections
text_prompts = [
    "a vehicle",
    "a person", 
    "a traffic light",
    "a building",
    "a tree"
]

for i, (box, score) in enumerate(zip(boxes[:3], scores[:3])):  # Test first 3 detections
    x1, y1, x2, y2 = box.astype(int)
    crop = image.crop((x1, y1, x2, y2))
    
    clip_result = clip_classifier.classify(crop, text_prompts)
    
    print(f"\nDetection {i+1}:")
    print(f"  BBox: [{x1}, {y1}, {x2}, {y2}]")
    print(f"  YOLO confidence: {score:.3f}")
    print(f"  CLIP best match: {clip_result['best_class']} (score: {clip_result['best_score']:.3f})")

print("\nPhase 1 test completed successfully!")