# US-001: Visual Trash Classification
## Epic 1: Artificial Intelligence & Vision

**Objective:** Kuko identifies and classifies trash using 5MP camera + Gemini AI

**Acceptance Criteria:**
- ✅ Capture photo with 5MP camera
- ✅ Gemini AI classifies into: toy, trash, clothing, other
- ✅ Confidence >70% for each object
- ✅ Returns approximate location (bbox)
- ✅ Classification time <3 seconds
- ✅ Works with indoor lighting (>200 lux)

## 1. Install Dependencies
Run this cell first to install required packages

In [None]:
!pip install google-generativeai opencv-python Pillow -q

## 2. Import Libraries

In [None]:
import cv2
import google.generativeai as genai
import time
from PIL import Image
import json
import os
from IPython.display import display, Image as IPImage

print("✓ Libraries imported successfully")

## 3. Configure Gemini API

**IMPORTANT:** Set your Gemini API key as an environment variable before proceeding.

In [None]:
# Set Gemini API Key as environment variable
import os

# TODO: Replace 'YOUR_API_KEY_HERE' with your actual Gemini API key
os.environ['GEMINI_API_KEY'] = 'YOUR_API_KEY_HERE'

print("✓ GEMINI_API_KEY environment variable set")

In [None]:
# Configure Gemini API using environment variable
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')

if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY environment variable not set. Please set it before running.")

genai.configure(api_key=GEMINI_API_KEY)

print("✓ Gemini API configured successfully")
print(f"✓ API Key: {GEMINI_API_KEY[:20]}...")

## 4. Initialize 5MP Camera

In [None]:
# Camera configuration for 5MP (2592x1944)
CAMERA_WIDTH = 2592
CAMERA_HEIGHT = 1944

# Initialize camera
camera = cv2.VideoCapture(0)
camera.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)

# Verify actual resolution
actual_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH))
actual_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT))

print(f"✓ Camera initialized: {actual_width}x{actual_height}")
print(f"✓ Target resolution: 5MP (2592x1944)")

if actual_width == CAMERA_WIDTH and actual_height == CAMERA_HEIGHT:
    print("✓ 5MP camera verified!")
else:
    print(f"⚠️  Camera resolution differs from 5MP target")

## 5. Capture Photo

In [None]:
# Capture photo
ret, frame = camera.read()

if not ret:
    print("❌ Failed to capture image")
else:
    # Save image
    image_path = "test_capture.jpg"
    cv2.imwrite(image_path, frame)
    print(f"✓ Photo captured successfully: {image_path}")
    print(f"✓ Image size: {frame.shape[1]}x{frame.shape[0]}")
    
    # Display captured image
    display(IPImage(filename=image_path, width=400))

## 6. Classify Objects with Gemini AI

In [None]:
# Initialize Gemini model
model = genai.GenerativeModel('gemini-2.5-flash-lite')

# Load captured image
img = Image.open(image_path)

# Classification prompt
prompt = """
Analyze this image and identify objects that are out of place (toys, trash, clothing, etc.).

For EACH object found, provide:
1. category: Must be one of [toy, trash, clothing, other]
2. description: Brief description of the object
3. confidence: Confidence percentage (0-100)
4. bbox: Approximate bounding box as [x_min, y_min, x_max, y_max] in pixels

ONLY include objects with confidence > 70%.

Return response in JSON format:
{
    "objects": [
        {
            "category": "toy|trash|clothing|other",
            "description": "object description",
            "confidence": 85,
            "bbox": [x1, y1, x2, y2]
        }
    ]
}

If no objects found with confidence >70%, return empty objects array.
"""

# Send to Gemini and measure time
print("🔍 Sending image to Gemini AI for classification...\n")
start_time = time.time()

response = model.generate_content([prompt, img])

processing_time = time.time() - start_time

print(f"✓ Classification completed in {processing_time:.2f} seconds")
print(f"✓ Target: <3 seconds | Actual: {processing_time:.2f}s")

if processing_time < 3:
    print("✓ Performance requirement MET")
else:
    print("⚠️  Performance requirement NOT MET")

## 7. Parse and Display Results

In [None]:
# Parse Gemini response
try:
    response_text = response.text.strip()
    
    # Extract JSON from markdown code blocks if present
    if "```json" in response_text:
        response_text = response_text.split("```json")[1].split("```")[0].strip()
    elif "```" in response_text:
        response_text = response_text.split("```")[1].split("```")[0].strip()
    
    result = json.loads(response_text)
    
    print("=" * 70)
    print("CLASSIFICATION RESULTS")
    print("=" * 70)
    print(f"\nObjects detected: {len(result.get('objects', []))}")
    
    if result.get('objects'):
        for i, obj in enumerate(result['objects'], 1):
            print(f"\n📦 Object {i}:")
            print(f"   Category:    {obj.get('category', 'unknown')}")
            print(f"   Description: {obj.get('description', 'N/A')}")
            print(f"   Confidence:  {obj.get('confidence', 0)}%")
            print(f"   Location:    {obj.get('bbox', 'N/A')}")
    else:
        print("\nNo objects detected with confidence >70%")
    
except json.JSONDecodeError as e:
    print("⚠️  Could not parse JSON response")
    print(f"\nRaw Gemini Response:\n{response.text}")
    result = {"objects": [], "raw_response": response.text}

## 8. Acceptance Criteria Validation

In [None]:
print("=" * 70)
print("US-001 ACCEPTANCE CRITERIA VALIDATION")
print("=" * 70)

criteria = [
    ("5MP camera capture", ret and frame.shape[0] > 0),
    ("Gemini AI classification", response is not None),
    ("Categories (toy/trash/clothing/other)", 
     all(obj.get('category') in ['toy', 'trash', 'clothing', 'other'] 
         for obj in result.get('objects', [])) if result.get('objects') else True),
    ("Confidence >70%", 
     all(obj.get('confidence', 0) > 70 
         for obj in result.get('objects', [])) if result.get('objects') else True),
    ("Bounding box location", 
     all('bbox' in obj 
         for obj in result.get('objects', [])) if result.get('objects') else True),
    ("Processing time <3s", processing_time < 3),
]

all_passed = True
for criterion, passed in criteria:
    status = "✓" if passed else "✗"
    print(f"  {status} {criterion}")
    if not passed:
        all_passed = False

print("\n" + "=" * 70)
if all_passed:
    print("🎉 ALL ACCEPTANCE CRITERIA PASSED!")
else:
    print("⚠️  Some criteria need attention")
print("=" * 70)

## 9. Cleanup

In [None]:
# Release camera
camera.release()
print("✓ Camera released")