# Load data & libs

## Import libraries

In [2]:
import numpy as np
import argparse
import glob
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import os

## Load data

In [3]:
img_dir = "/Users/thanhhuongtran/Documents/him/seed-size/seed_ground_truth/raw_images/"

In [4]:
img_list = [f'Anh{i}.jpg' for i in range(1, 11)]

In [5]:
print(img_list)

['Anh1.jpg', 'Anh2.jpg', 'Anh3.jpg', 'Anh4.jpg', 'Anh5.jpg', 'Anh6.jpg', 'Anh7.jpg', 'Anh8.jpg', 'Anh9.jpg', 'Anh10.jpg']


# Traditional image processing method

## Canny edge detector method

In [35]:
def process_image(img_path, output_image_path, output_excel_path):
    img = cv2.imread(img_path)
    
    if img is None:
        print(f"Error: Unable to load image from {img_path}")
        return
    
    #Smooth the images
    dilated = cv2.dilate(img, np.ones((3, 3), np.uint8), iterations = 7)

    # Convert to grayscale
    gray = cv2.cvtColor(dilated, cv2.COLOR_BGR2GRAY)
    
    # Apply threshold for accurate edge detection
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Apply Canny edge detector
    edges = cv2.Canny(binary, 100, 200)

     # Smooth the edges
    dilation = cv2.dilate(edges, np.ones((3, 3), np.uint8), iterations = 2)
    cv2.imshow("Dilation", dilation)

    # Find contours
    contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    #Filtering contours by area
    #contours = [cnt for cnt in contours if (cv2.contourArea(cnt) < 30000 and cv2.contourArea(cnt) > 7000)]
    contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 5000]

    # Find bounding rectangles for each contour and rearrange them by area
    min_rects = [cv2.minAreaRect(cnt) for cnt in contours]
    min_rects.sort(key=lambda rect: rect[0][0]) # Sort by x coordinate

    result = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    num_obj = 0
    obj_sizes = []

    # Create dataframe for output
    data = {"Object": [], "Width": [], "Height": []}

    # Draw bounding box and numbering for each object
    for i, (rect, cnt) in enumerate(zip(min_rects, contours)):
        box = cv2.boxPoints(rect)
        box = np.intp(box)
        cv2.drawContours(result, [box], 0, (255, 0, 0), 2)

        width = rect[1][0]
        height = rect[1][1]
        
        if width < height:
            width, height = height, width
        
        if i == 0:
            coin_diameter = max(width, height)
            pixel_per_mm = coin_diameter / 20
            width_mm = width / pixel_per_mm
            height_mm = height / pixel_per_mm
            obj_sizes.append((width_mm, height_mm))
        else:
            width_mm = width / pixel_per_mm
            height_mm = height / pixel_per_mm
            obj_sizes.append((width_mm, height_mm))
        

        cv2.line(result, ((box[0][0] + box[1][0]) // 2, (box[0][1] + box[1][1]) // 2), ((box[2][0] + box[3][0]) // 2, (box[2][1] + box[3][1]) // 2), (0, 0, 255), 2) # width
        cv2.line(result, ((box[1][0] + box[2][0]) // 2, (box[1][1] + box[2][1]) // 2), ((box[0][0] + box[3][0]) // 2, (box[0][1] + box[3][1]) // 2), (0, 0, 255), 2) # height
        cv2.putText(result, f"{i + 1}:{width_mm:.2f}x{height_mm:.2f}", (int(rect[0][0]), int(rect[0][1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 0, 0), 2)
        
        num_obj += 1
        data["Object"].append(i + 1)
        data["Width"].append(width_mm)
        data["Height"].append(height_mm)
    
    # insert index seed into image
    cv2.putText(result, f"Seed: {num_obj - 1}", (100, 300), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 0, 255), 2)
    cv2.imwrite(output_image_path, cv2.cvtColor(result, cv2.COLOR_RGB2BGR))

    df = pd.DataFrame(data)
    df.to_excel(output_excel_path, index=False)

In [36]:
for i, image in enumerate(img_list):
    image_path = img_dir + image
    output_image_path = f'results/canny/{i + 1}_counting_with_bb.jpg'
    output_excel_path = f'results/canny/{i + 1}_obj_sz.xlsx'
    process_image(image_path, output_image_path, output_excel_path)

## Sobel edge detector method

In [11]:
def process_image(img_path, output_image_path, output_excel_path):
    #Load the image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error: Unable to load image from {img_path}")
        return
    
    # Smooth the image
    dilated = cv2.dilate(img, np.ones((3, 3), np.uint8), iterations=7)

    # Convert to grayscale
    gray = cv2.cvtColor(dilated, cv2.COLOR_BGR2GRAY)
    
    # Apply Sobel filter
    sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    sobel_combined = cv2.magnitude(sobel_x, sobel_y)
    sobel_combined = cv2.convertScaleAbs(sobel_combined)

    # Threshold to create binary image
    _, binary = cv2.threshold(sobel_combined, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Smooth edges
    dilation = cv2.dilate(binary, np.ones((3, 3), np.uint8), iterations=2)
    
    # Find contours
    contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Filter contours by area
    contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 5000]

    # Find bounding rectangles for each contour
    min_rects = [cv2.minAreaRect(cnt) for cnt in contours]
    min_rects.sort(key=lambda rect: rect[0][0])  # Sort by x-coordinate
    
    result = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    num_obj = 0
    obj_sizes = []
    
    # Create dataframe for output
    data = {"Object": [], "Width": [], "Height": []}
    
    # Process each contour
    for i, (rect, cnt) in enumerate(zip(min_rects, contours)):
        box = cv2.boxPoints(rect)
        box = np.intp(box)
        cv2.drawContours(result, [box], 0, (255, 0, 0), 2)
        
        width = rect[1][0]
        height = rect[1][1]
        
        # Ensure width > height
        if width < height:
            width, height = height, width
        
        if i == 0:
            coin_diameter = max(width, height)
            pixel_per_mm = coin_diameter / 20  # Assuming a reference object of 20mm
            width_mm = width / pixel_per_mm
            height_mm = height / pixel_per_mm
        else:
            width_mm = width / pixel_per_mm
            height_mm = height / pixel_per_mm
        
        obj_sizes.append((width_mm, height_mm))
        
        cv2.line(result, ((box[0][0] + box[1][0]) // 2, (box[0][1] + box[1][1]) // 2), 
                 ((box[2][0] + box[3][0]) // 2, (box[2][1] + box[3][1]) // 2), (0, 0, 255), 2)  # Width line
        cv2.line(result, ((box[1][0] + box[2][0]) // 2, (box[1][1] + box[2][1]) // 2), 
                 ((box[0][0] + box[3][0]) // 2, (box[0][1] + box[3][1]) // 2), (0, 0, 255), 2)  # Height line
        cv2.putText(result, f"{i + 1}: {width_mm:.2f}x{height_mm:.2f}", 
                    (int(rect[0][0]), int(rect[0][1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 0, 0), 2)
        
        num_obj += 1
        data["Object"].append(i + 1)
        data["Width"].append(width_mm)
        data["Height"].append(height_mm)
    
    # Insert total seed count in image
    cv2.putText(result, f"Seed: {num_obj}", (100, 300), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 0, 255), 2)
    
    # Save output image and Excel file
    cv2.imwrite(output_image_path, cv2.cvtColor(result, cv2.COLOR_RGB2BGR))
    df = pd.DataFrame(data)
    df.to_excel(output_excel_path, index=False)

    print(f"Processed image saved to {output_image_path}")
    print(f"Seed size data saved to {output_excel_path}")

In [12]:
for i, image in enumerate(img_list):
    image_path = img_dir + image
    output_image_path = f'results/sobel/{i + 1}_counting_with_bb.jpg'
    output_excel_path = f'results/sobel/{i + 1}_obj_sz.xlsx'
    process_image(image_path, output_image_path, output_excel_path)

Processed image saved to results/sobel/1_counting_with_bb.jpg
Seed size data saved to results/sobel/1_obj_sz.xlsx
Processed image saved to results/sobel/2_counting_with_bb.jpg
Seed size data saved to results/sobel/2_obj_sz.xlsx
Processed image saved to results/sobel/3_counting_with_bb.jpg
Seed size data saved to results/sobel/3_obj_sz.xlsx
Processed image saved to results/sobel/4_counting_with_bb.jpg
Seed size data saved to results/sobel/4_obj_sz.xlsx
Processed image saved to results/sobel/5_counting_with_bb.jpg
Seed size data saved to results/sobel/5_obj_sz.xlsx
Processed image saved to results/sobel/6_counting_with_bb.jpg
Seed size data saved to results/sobel/6_obj_sz.xlsx
Processed image saved to results/sobel/7_counting_with_bb.jpg
Seed size data saved to results/sobel/7_obj_sz.xlsx
Processed image saved to results/sobel/8_counting_with_bb.jpg
Seed size data saved to results/sobel/8_obj_sz.xlsx
Processed image saved to results/sobel/9_counting_with_bb.jpg
Seed size data saved to re

## Laplacian edge detector method

In [14]:
def process_image(img_path, output_image_path, output_excel_path):
    #Load the image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error: Unable to load image from {img_path}")
        return
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (3, 3), 0)
    
    # Apply Laplacian edge detector
    laplacian = cv2.Laplacian(blurred, cv2.CV_64F, ksize=5)
    
    # Convert to absolute value and uint8
    laplacian_abs = np.uint8(np.absolute(laplacian))
    
    # Normalize and apply threshold
    # Use Otsu's method to determine the threshold
    _, thresh = cv2.threshold(laplacian_abs, 50, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Find contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Filter contours by area
    contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 5000]

    # Find bounding rectangles for each contour
    min_rects = [cv2.minAreaRect(cnt) for cnt in contours]
    min_rects.sort(key=lambda rect: rect[0][0])  # Sort by x-coordinate
    
    result = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    num_obj = 0
    obj_sizes = []
    
    # Create dataframe for output
    data = {"Object": [], "Width": [], "Height": []}
    
    # Process each contour
    for i, (rect, cnt) in enumerate(zip(min_rects, contours)):
        box = cv2.boxPoints(rect)
        box = np.intp(box)
        cv2.drawContours(result, [box], 0, (255, 0, 0), 2)
        
        width = rect[1][0]
        height = rect[1][1]
        
        # Ensure width > height
        if width < height:
            width, height = height, width
        
        if i == 0:
            coin_diameter = max(width, height)
            pixel_per_mm = coin_diameter / 20  # Assuming a reference object of 20mm
            width_mm = width / pixel_per_mm
            height_mm = height / pixel_per_mm
        else:
            width_mm = width / pixel_per_mm
            height_mm = height / pixel_per_mm
        
        obj_sizes.append((width_mm, height_mm))
        
        cv2.line(result, ((box[0][0] + box[1][0]) // 2, (box[0][1] + box[1][1]) // 2), 
                 ((box[2][0] + box[3][0]) // 2, (box[2][1] + box[3][1]) // 2), (0, 0, 255), 2)  # Width line
        cv2.line(result, ((box[1][0] + box[2][0]) // 2, (box[1][1] + box[2][1]) // 2), 
                 ((box[0][0] + box[3][0]) // 2, (box[0][1] + box[3][1]) // 2), (0, 0, 255), 2)  # Height line
        cv2.putText(result, f"{i + 1}: {width_mm:.2f}x{height_mm:.2f}", 
                    (int(rect[0][0]), int(rect[0][1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 0, 0), 2)
        
        num_obj += 1
        data["Object"].append(i + 1)
        data["Width"].append(width_mm)
        data["Height"].append(height_mm)
    
    # Insert total seed count in image
    cv2.putText(result, f"Seed: {num_obj}", (100, 300), cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 0, 255), 2)
    
    # Save output image and Excel file
    cv2.imwrite(output_image_path, cv2.cvtColor(result, cv2.COLOR_RGB2BGR))
    df = pd.DataFrame(data)
    df.to_excel(output_excel_path, index=False)

    print(f"Processed image saved to {output_image_path}")
    print(f"Seed size data saved to {output_excel_path}")

In [15]:
for i, image in enumerate(img_list):
    image_path = img_dir + image
    output_image_path = f'results/laplacian/{i + 1}_counting_with_bb.jpg'
    output_excel_path = f'results/laplacian/{i + 1}_obj_sz.xlsx'
    process_image(image_path, output_image_path, output_excel_path)

Processed image saved to results/laplacian/1_counting_with_bb.jpg
Seed size data saved to results/laplacian/1_obj_sz.xlsx
Processed image saved to results/laplacian/2_counting_with_bb.jpg
Seed size data saved to results/laplacian/2_obj_sz.xlsx
Processed image saved to results/laplacian/3_counting_with_bb.jpg
Seed size data saved to results/laplacian/3_obj_sz.xlsx
Processed image saved to results/laplacian/4_counting_with_bb.jpg
Seed size data saved to results/laplacian/4_obj_sz.xlsx
Processed image saved to results/laplacian/5_counting_with_bb.jpg
Seed size data saved to results/laplacian/5_obj_sz.xlsx
Processed image saved to results/laplacian/6_counting_with_bb.jpg
Seed size data saved to results/laplacian/6_obj_sz.xlsx
Processed image saved to results/laplacian/7_counting_with_bb.jpg
Seed size data saved to results/laplacian/7_obj_sz.xlsx
Processed image saved to results/laplacian/8_counting_with_bb.jpg
Seed size data saved to results/laplacian/8_obj_sz.xlsx
Processed image saved to

# Statistical based model