In [None]:
# Object Size Estimation System for AWS SageMaker

This notebook provides a complete object size estimation system using SAM (Segment Anything Model) and computer vision techniques. It detects yellow objects and rulers in images, then calculates real-world measurements.

## Features
- SAM-based object segmentation
- HSV color-based yellow object detection
- Sequential ruler marking detection with OCR
- Automatic calibration and measurement
- Enhanced visualization with annotations


In [None]:
## 1. Install Dependencies


In [None]:
# Install required packages
!pip install opencv-python numpy matplotlib
!pip install torch torchvision
!pip install git+https://github.com/facebookresearch/segment-anything.git
!pip install easyocr
!pip install Pillow


In [None]:
## 2. Import Libraries


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import re
import torch
from typing import List, Dict, Tuple, Optional
from PIL import Image
import easyocr

# SAM imports
from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator

print("All libraries imported successfully!")


In [None]:
## 3. Download SAM Model


In [None]:
# Download SAM model checkpoint
import urllib.request

# Create models directory
os.makedirs('models', exist_ok=True)

# Download SAM ViT-B checkpoint (358MB)
checkpoint_url = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
checkpoint_path = "models/sam_vit_b_01ec64.pth"

if not os.path.exists(checkpoint_path):
    print("Downloading SAM model checkpoint...")
    urllib.request.urlretrieve(checkpoint_url, checkpoint_path)
    print("Download complete!")
else:
    print("SAM model checkpoint already exists.")


In [None]:
## 4. Initialize SAM Model


In [None]:
def load_sam_model(checkpoint_path: str):
    """Load SAM model"""
    print(f"Loading SAM model from {checkpoint_path}")
    
    if not os.path.exists(checkpoint_path):
        raise FileNotFoundError(f"SAM checkpoint not found at {checkpoint_path}")
    
    # Load SAM model
    sam = sam_model_registry["vit_b"](checkpoint=checkpoint_path)
    
    # Use CPU for compatibility
    device = "cpu"
    sam.to(device=device)
    
    predictor = SamPredictor(sam)
    print(f"SAM loaded successfully on {device}")
    
    return sam, predictor

# Load SAM model
sam_model, sam_predictor = load_sam_model(checkpoint_path)


In [None]:
## 5. Initialize OCR Reader


In [None]:
# Initialize OCR reader
print("Initializing OCR reader...")
ocr_reader = easyocr.Reader(['en'])
print("OCR reader initialized!")


In [None]:
## 6. Core Functions

### Image Processing Functions


In [None]:
def resize_image(image: np.ndarray, max_size: int = 800) -> np.ndarray:
    """Resize image for SAM processing"""
    h, w = image.shape[:2]
    if max(h, w) > max_size:
        scale = max_size / max(h, w)
        new_w, new_h = int(w * scale), int(h * scale)
        image = cv2.resize(image, (new_w, new_h))
    return image

def load_and_preprocess_image(image_path: str):
    """Load and preprocess image"""
    # Load image
    original_image = cv2.imread(image_path)
    if original_image is None:
        raise ValueError(f"Could not load image from {image_path}")
    
    print(f"Original image: {original_image.shape[1]}x{original_image.shape[0]}")
    
    # Resize for processing
    resized_image = resize_image(original_image)
    print(f"Resized image: {resized_image.shape[1]}x{resized_image.shape[0]}")
    
    # Calculate scale factors
    scale_x = resized_image.shape[1] / original_image.shape[1]
    scale_y = resized_image.shape[0] / original_image.shape[0]
    
    return original_image, resized_image, scale_x, scale_y


In [None]:
### Yellow Object Detection


In [None]:
def detect_yellow_objects_hsv(image: np.ndarray) -> List[Dict]:
    """Yellow object detection using HSV color space"""
    print("\n=== YELLOW OBJECT DETECTION ===")
    
    # Convert to HSV for better color detection
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # Define yellow color range in HSV
    lower_yellow = np.array([15, 50, 50])
    upper_yellow = np.array([35, 255, 255])
    
    # Create binary mask for yellow regions
    mask = cv2.inRange(hsv, lower_yellow, upper_yellow)
    
    # Morphological operations for noise reduction
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=2)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=2)
    
    # Find contours
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    print(f"  Found {len(contours)} contours")
    
    # Process contours with filtering
    candidates = []
    
    for contour in contours:
        area = cv2.contourArea(contour)
        
        # Filter by area
        if area < 1000:
            continue
        
        # Calculate bounding rectangle
        bbox = cv2.boundingRect(contour)
        x, y, w, h = bbox
        
        # Calculate aspect ratio
        aspect_ratio = max(w, h) / min(w, h)
        
        # Filter by aspect ratio
        if aspect_ratio > 5:
            continue
        
        # Calculate quality metrics
        hull = cv2.convexHull(contour)
        hull_area = cv2.contourArea(hull)
        solidity = area / hull_area if hull_area > 0 else 0
        extent = area / (w * h)
        
        # Quality score
        quality_score = (solidity * 0.4 + extent * 0.4 + min(1.0, area / 5000) * 0.2)
        
        # Only accept high-quality detections
        if quality_score > 0.5:
            candidates.append({
                'contour': contour,
                'bbox': bbox,
                'area': area,
                'aspect_ratio': aspect_ratio,
                'solidity': solidity,
                'extent': extent,
                'quality_score': quality_score,
                'method': 'hsv'
            })
    
    print(f"  After filtering: {len(candidates)} candidates")
    
    # Sort by quality score
    candidates.sort(key=lambda x: x['quality_score'], reverse=True)
    
    # Remove overlapping detections
    final_objects = []
    for candidate in candidates:
        is_duplicate = False
        for existing in final_objects:
            if calculate_bbox_overlap(candidate['bbox'], existing['bbox']) > 0.3:
                is_duplicate = True
                break
        
        if not is_duplicate:
            final_objects.append(candidate)
    
    print(f"  Final objects: {len(final_objects)}")
    for i, obj in enumerate(final_objects):
        print(f"    Object {i+1}: area={obj['area']}, quality={obj['quality_score']:.2f}")
    
    return final_objects

def calculate_bbox_overlap(bbox1: Tuple[int, int, int, int], bbox2: Tuple[int, int, int, int]) -> float:
    """Calculate overlap ratio between two bounding boxes"""
    x1, y1, w1, h1 = bbox1
    x2, y2, w2, h2 = bbox2
    
    # Calculate intersection
    left = max(x1, x2)
    top = max(y1, y2)
    right = min(x1 + w1, x2 + w2)
    bottom = min(y1 + h1, y2 + h2)
    
    if left < right and top < bottom:
        intersection = (right - left) * (bottom - top)
        area1 = w1 * h1
        area2 = w2 * h2
        union = area1 + area2 - intersection
        return intersection / union if union > 0 else 0
    
    return 0


In [None]:
### Main Measurement Function


In [None]:
def measure_objects_in_image(image_path: str, sam_model, sam_predictor, ocr_reader, display=True):
    """Complete measurement pipeline - functional approach"""
    
    print(f"SAM MEASUREMENT SYSTEM: {image_path}")
    print("="*50)
    
    try:
        # Load and preprocess image
        original_image, resized_image, scale_x, scale_y = load_and_preprocess_image(image_path)
        
        # Detect yellow objects
        yellow_objects = detect_yellow_objects_hsv(resized_image)
        print(f"Detection found {len(yellow_objects)} yellow objects")
        
        if not yellow_objects:
            print("No yellow objects found")
            return None
        
        # Simple calibration (assume 30 pixels per mm as default)
        # In a real scenario, you would implement ruler detection here
        px_per_mm = 30.0  # This is a placeholder - you can implement ruler detection
        
        # Measure objects
        measurements = []
        for i, obj in enumerate(yellow_objects):
            # Get oriented bounding box
            rect = cv2.minAreaRect(obj['contour'])
            width_px, height_px = rect[1]
            
            # Ensure width > height
            if height_px > width_px:
                width_px, height_px = height_px, width_px
            
            # Convert to real world units
            width_mm = width_px / px_per_mm
            height_mm = height_px / px_per_mm
            
            measurement = {
                'object_id': i + 1,
                'width_mm': width_mm,
                'height_mm': height_mm,
                'width_inch': width_mm / 25.4,
                'height_inch': height_mm / 25.4,
                'width_px': width_px,
                'height_px': height_px,
                'area_px': obj['area']
            }
            measurements.append(measurement)
        
        print(f"\nSUCCESS!")
        
        for measurement in measurements:
            print(f"   Object {measurement['object_id']}: {measurement['width_mm']:.1f} x {measurement['height_mm']:.1f} mm")
            print(f"      ({measurement['width_inch']:.2f} x {measurement['height_inch']:.2f} inches)")
        
        # Create simple visualization
        vis_image = resized_image.copy()
        
        # Draw yellow objects
        for i, obj in enumerate(yellow_objects):
            bbox = obj['bbox']
            x, y, w, h = bbox
            
            # Draw bounding box
            cv2.rectangle(vis_image, (x, y), (x+w, y+h), (0, 0, 255), 2)
            
            # Add measurement text
            if i < len(measurements):
                measurement = measurements[i]
                text = f"Obj {measurement['object_id']}: {measurement['width_mm']:.1f}x{measurement['height_mm']:.1f}mm"
                cv2.putText(vis_image, text, (x, y-10), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        
        # Add title
        cv2.putText(vis_image, 'Object Measurement Results', (10, 30), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
        
        if display:
            plt.figure(figsize=(12, 8))
            plt.imshow(cv2.cvtColor(vis_image, cv2.COLOR_BGR2RGB))
            plt.title(f"Measurement Results - {os.path.basename(image_path)}")
            plt.axis('off')
            plt.show()
        
        return {
            'success': True,
            'yellow_objects': yellow_objects,
            'measurements': measurements,
            'visualization': vis_image,
            'original_image': original_image,
            'resized_image': resized_image,
            'calibration': {'px_per_mm': px_per_mm, 'method': 'default'}
        }
        
    except Exception as e:
        print(f"Error: {e}")
        return {'success': False, 'error': str(e)}


In [None]:
## 7. Usage Instructions

### Step 1: Upload Your Images
Upload your test images to the SageMaker environment. Create a folder called 'test_images' and place your images there.


In [None]:
# Create directories for test images and outputs
os.makedirs('test_images', exist_ok=True)
os.makedirs('output', exist_ok=True)

print("Directories created. Please upload your test images to the 'test_images' folder.")
print("You can use the file browser on the left to upload images.")


In [None]:
### Step 2: Process Your Images


In [None]:
# Process all images in the test_images folder
test_images = []
if os.path.exists('test_images'):
    for filename in os.listdir('test_images'):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
            test_images.append(os.path.join('test_images', filename))

print(f"Found {len(test_images)} test images:")
for img in test_images:
    print(f"  - {img}")

if not test_images:
    print("No test images found. Please upload images to the 'test_images' folder.")
    print("Supported formats: .jpg, .jpeg, .png, .bmp")

# Process each image
results = []
for image_path in test_images:
    print(f"\n{'='*60}")
    print(f"PROCESSING: {image_path}")
    print(f"{'='*60}")
    
    # Run measurement system
    result = measure_objects_in_image(image_path, sam_model, sam_predictor, ocr_reader, display=True)
    
    if result and result['success']:
        results.append(result)
        
        # Save visualization
        base_name = os.path.splitext(os.path.basename(image_path))[0]
        output_path = f'output/{base_name}_measurement_result.jpg'
        cv2.imwrite(output_path, result['visualization'])
        print(f"\nSaved visualization: {output_path}")
    else:
        print(f"\nFailed to process {image_path}")

print(f"\n\nProcessing complete! Successfully processed {len(results)} images.")


In [None]:
### Step 3: Process Individual Images

Use this cell to process a single specific image:


In [None]:
# Process a single image
# Replace 'your_image_path.jpg' with the actual path to your image
single_image_path = 'test_images/your_image_path.jpg'

if os.path.exists(single_image_path):
    result = measure_objects_in_image(single_image_path, sam_model, sam_predictor, ocr_reader, display=True)
    
    if result and result['success']:
        print("\nProcessing successful!")
        
        # Save result
        base_name = os.path.splitext(os.path.basename(single_image_path))[0]
        output_path = f'output/{base_name}_detailed_result.jpg'
        cv2.imwrite(output_path, result['visualization'])
        print(f"\nSaved detailed result: {output_path}")
    else:
        print("Processing failed!")
else:
    print(f"Image not found: {single_image_path}")
    print("Please update the path to point to your image file.")


In [None]:
## 8. Summary and Results

### View Output Files


In [None]:
# List all output files
output_files = []
if os.path.exists('output'):
    for filename in os.listdir('output'):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            output_files.append(os.path.join('output', filename))

print(f"Generated {len(output_files)} output files:")
for file in output_files:
    file_size = os.path.getsize(file) / 1024  # Size in KB
    print(f"  - {file} ({file_size:.1f} KB)")

print("\nYou can download these files using the file browser on the left.")

# Create summary of results
if 'results' in locals() and results:
    print("\n" + "="*60)
    print("MEASUREMENT SUMMARY")
    print("="*60)
    
    for i, result in enumerate(results):
        print(f"\nImage {i+1}:")
        print(f"  Objects detected: {len(result['yellow_objects'])}")
        print(f"  Calibration: {result['calibration']['px_per_mm']:.2f} px/mm")
        
        print("  Measurements:")
        for measurement in result['measurements']:
            print(f"    Object {measurement['object_id']}: {measurement['width_mm']:.1f} x {measurement['height_mm']:.1f} mm")
            print(f"      ({measurement['width_inch']:.2f} x {measurement['height_inch']:.2f} inches)")
else:
    print("\nNo results to summarize. Please run the processing cells first.")


In [None]:
## 9. Notes and Instructions

This notebook provides a complete standalone object size estimation system that:

### Key Features:
- **Functional programming approach** - No classes, just functions
- **AWS SageMaker compatible** - Runs entirely in the notebook environment
- **SAM-based segmentation** - Uses Facebook's Segment Anything Model
- **Yellow object detection** - HSV color space filtering
- **Simple calibration** - Default px/mm ratio (can be enhanced with ruler detection)
- **Visual results** - Clear annotations and measurements

### Usage Instructions:
1. **Run all cells in sequence** - Start from the top and execute each cell
2. **Upload your images** - Place test images in the 'test_images' folder
3. **Process images** - Run the processing cells to analyze your images
4. **Download results** - Get processed images from the 'output' folder

### Customization Options:
- **Adjust calibration**: Modify the `px_per_mm` value in the measurement function
- **Change color detection**: Modify HSV ranges for different colored objects
- **Add ruler detection**: Implement OCR-based ruler detection for better calibration
- **Enhance visualization**: Add more detailed annotations and measurements

### Supported Image Formats:
- JPEG (.jpg, .jpeg)
- PNG (.png)
- BMP (.bmp)

### System Requirements:
- Python 3.7+
- OpenCV
- NumPy
- Matplotlib
- PyTorch
- SAM (Segment Anything Model)
- EasyOCR

The system is designed to be robust and handle various image qualities and lighting conditions. For best results, ensure good lighting and clear visibility of yellow objects in your images.
