# This notebook aims to separate individual bean images from the cluster pictures gathered.

In [62]:
import cv2
import os


def split_beans(in_path, out_dir):
    """
    This function does the following:
        - read the raw image at the path
        - find regions containing individual coffee beans
        - store each region in its own path
    @param in_path the path to the input image
    @param out_dir the path at which the resulting images are to be stored
    """
    img = cv2.imread(in_path)
    gray = cv2.cvtColor(
        img, cv2.COLOR_RGB2GRAY
    )  # Grayscale the image. Note that the background is white and the beans are dark. This will be important down the line

    thresh = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.bitwise_not(
        thresh
    )  # OpenCV needs the blobs to be white and the background to be black in order to find the contours. Flip the now bw image

    result = img.copy()
    contours = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]

    bounding_rects = filter(
        lambda c: c[2] * c[3] >= 10000 and c[2] / c[3] < 3.0 and c[3] / c[2] < 3.0,
        map(cv2.boundingRect, contours),
    )  # This is a bit crude, but filter out contours of specs of dust, etc and make sure only actual beans remain highlighted. Since the bounding rectangles are **roughly** square, this corresponds to a roughly 100x100 image. Also, filter out regions that are really misshapen and are unlikely to contain a bean - such as a long shadow or the edge of the background

    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)

    for i, (x, y, w, h) in enumerate(bounding_rects):
        print(f"Region {i}\nx: {x}, y: {y}, width: {w}, height: {h}")
        print(f"area: {w * h}")
        print("-----------------------------------------------------")
        bean = result[
            y : y + h, x : x + w
        ]  # Chop off the portion of the image around the given contour and save in its own file
        cv2.imwrite(f"out-test/bean-{i}.png", bean)

In [64]:
split_beans("data/raw/ethiopia-yirga-CM-quaker/PXL_20240211_140405064.jpg", "out-test")

Region 0
x: 467, y: 2447, width: 248, height: 305
area: 75640
-----------------------------------------------------
Region 1
x: 2606, y: 2445, width: 239, height: 305
area: 72895
-----------------------------------------------------
Region 2
x: 2114, y: 2444, width: 210, height: 300
area: 63000
-----------------------------------------------------
Region 3
x: 1653, y: 2439, width: 241, height: 265
area: 63865
-----------------------------------------------------
Region 4
x: 1050, y: 2436, width: 351, height: 313
area: 109863
-----------------------------------------------------
Region 5
x: 2593, y: 1962, width: 309, height: 324
area: 100116
-----------------------------------------------------
Region 6
x: 2205, y: 1919, width: 270, height: 331
area: 89370
-----------------------------------------------------
Region 7
x: 3603, y: 1917, width: 477, height: 1155
area: 550935
-----------------------------------------------------
Region 8
x: 1682, y: 1874, width: 247, height: 377
area: 9311