# 🍇 Grape Size Estimation Using Ruler and SAM

This notebook detects a ruler in the image using SAM, estimates the pixel-to-cm scale,
and uses a provided grape mask to measure grape sizes in cm².

Supports both **horizontal and vertical** rulers.


How to use the following cell (config) in AWS?

	1.	Go to Amazon SageMaker Console > Notebook instances > Lifecycle Configurations
	2.	Click Create Configuration
	3.	Paste the script above into the Start notebook section
	4.	Name it something like grape-sam-setup
	5.	When creating your notebook instance, attach this lifecycle configuration

In [None]:
#!/bin/bash

set -e

echo "------ Starting Lifecycle Config for Grape Phenotyping ------"

# Activate default conda environment
source /home/ec2-user/anaconda3/bin/activate

# Install system-level dependencies
sudo yum -y update
sudo yum -y install tesseract

# Install Python packages (use pip from base environment)
pip install --upgrade pip

pip install \
    opencv-python-headless \
    numpy \
    matplotlib \
    pytesseract \
    tqdm \
    torch \
    torchvision \
    notebook

# Clone and install Segment Anything
cd /home/ec2-user/SageMaker
if [ ! -d "segment-anything" ]; then
    git clone https://github.com/facebookresearch/segment-anything.git
fi
cd segment-anything
pip install -e .

# Download SAM model checkpoint if not exists
cd /home/ec2-user/SageMaker
if [ ! -f "sam_vit_h_4b8939.pth" ]; then
    wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
fi

echo "------ Grape Phenotyping Environment Ready ------"

In [None]:
# ✅ Setup
import cv2
import numpy as np
import pytesseract
from matplotlib import pyplot as plt
from segment_anything import sam_model_registry, SamPredictor

def load_sam():
    sam = sam_model_registry["vit_h"](checkpoint="sam_vit_h_4b8939.pth")
    return SamPredictor(sam)


In [None]:
# 📐 Preprocessing for OCR
def preprocess_for_ocr(image, mask=None):
    if mask is not None:
        image = cv2.bitwise_and(image, image, mask=mask.astype(np.uint8))
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    denoised = cv2.fastNlMeansDenoising(gray, h=30)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(denoised)
    _, binary = cv2.threshold(enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return binary


In [None]:
# 📏 Tick fallback (horizontal or vertical)
def fallback_tick_spacing(gray):
    edges = cv2.Canny(gray, 50, 150)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, 30, minLineLength=10, maxLineGap=5)
    if lines is None: raise RuntimeError("No ticks detected")
    orientations = []
    for x1, y1, x2, y2 in lines[:,0]:
        if abs(x1 - x2) < 5: orientations.append((min(y1, y2)))  # vertical ticks
        elif abs(y1 - y2) < 5: orientations.append((min(x1, x2)))  # horizontal ticks
    orientations = sorted(set(orientations))
    spacings = [orientations[i+1] - orientations[i] for i in range(len(orientations)-1)]
    return np.median(spacings)


In [None]:
# 🔍 Compute pixel-per-cm scale
def compute_scale(gray, image, debug=False):
    data = pytesseract.image_to_data(gray, config='--psm 6 digits', output_type=pytesseract.Output.DICT)
    positions = {}
    for i, txt in enumerate(data['text']):
        if txt.isdigit():
            val = int(txt)
            if 0 <= val <= 30:
                cx = data['left'][i] + data['width'][i]//2
                cy = data['top'][i] + data['height'][i]//2
                positions[val] = (cx, cy)
    for a, b in [(0, 10), (0, 5), (0, 1), (1, 2), (2, 3)]:
        if a in positions and b in positions:
            d = np.linalg.norm(np.array(positions[a]) - np.array(positions[b]))
            return d / abs(b - a)
    return fallback_tick_spacing(gray)


In [None]:
# 🧠 Find ruler mask from SAM outputs
def find_ruler_mask(image, masks):
    for m in masks:
        cnts, _ = cv2.findContours(m.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for cnt in cnts:
            x, y, w, h = cv2.boundingRect(cnt)
            ar = max(w/h, h/w)
            if ar > 5 and min(w,h) > 20:
                return m
    return None


In [None]:
# 📷 Process image and measure scale
def measure_image(image_path, grape_mask_path=None, debug=True):
    image = cv2.imread(image_path)
    predictor = load_sam()
    predictor.set_image(image)
    h, w = image.shape[:2]
    masks, _, _ = predictor.predict(point_coords=np.array([[w//2,h//2]]), point_labels=np.array([1]), multimask_output=True)
    ruler_mask = find_ruler_mask(image, masks)
    gray = preprocess_for_ocr(image, ruler_mask)
    pixel_per_cm = compute_scale(gray, image, debug=debug)
    print(f"Estimated scale: {pixel_per_cm:.2f} pixels per cm")
    if grape_mask_path:
        gm = cv2.imread(grape_mask_path, 0)
        gm = (gm > 127).astype(np.uint8)
        n, lbls, stats, _ = cv2.connectedComponentsWithStats(gm, 8)
        sizes = [stats[i,cv2.CC_STAT_AREA]/(pixel_per_cm**2) for i in range(1, n)]
        print(f"Detected {len(sizes)} grapes. Example sizes: {sizes[:5]}")
        return sizes
    return pixel_per_cm


In [None]:
measure_image("example_grape.jpg", "example_grape_mask.png")