In [1]:
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
# READ CSV WITH IMAGE PATHS

train_ds = pd.read_csv("../Dataset/labels_train.csv", index_col=0)
train_ds.head()

Unnamed: 0,Image path,Class,Weight
0,Dataset/Train/FBO1/cacao-31.jpg,FBO1,1.66
1,Dataset/Train/FBO1/cacao-32.jpg,FBO1,1.27
2,Dataset/Train/FBO1/cacao-33.jpg,FBO1,1.79
3,Dataset/Train/FBO1/cacao-34.jpg,FBO1,1.25
4,Dataset/Train/FBO1/cacao-35.jpg,FBO1,1.85


In [5]:
from scipy.ndimage import binary_fill_holes
from skimage.morphology import remove_small_objects
from skimage.measure import regionprops
from skimage.measure import label
from skimage.feature import graycomatrix, graycoprops
from scipy import stats

def process_image(image):
    
    # Resize once and convert to grayscale
    image_resized = cv2.resize(image, (500, 300))
    image_gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY)

    # Thresholding and hole filling
    _, img = cv2.threshold(image_gray, 0, 1, cv2.THRESH_OTSU)
    img = 1 - img
    img = binary_fill_holes(img)

    # Calculate largest bounding box using regionprops
    lab, num = label(img, return_num=True)
    max_area = 0
    bbox = []

    for i in range(1, num + 1):
        object_region = (lab == i).astype('uint8')
        prop = regionprops(object_region)[0]
        area = prop.area
        if area > max_area:
            max_area = area
            bbox = prop.bbox

            
    # If max_area is too small, skip processing (early exit)
    if max_area < 1000:
        print(f"Max area too small: {max_area}")
        return None

    # Crop the image and apply the mask
    img_cropped = image_resized[bbox[0]: bbox[2], bbox[1]: bbox[3]]
    mask_cropped = img[bbox[0]: bbox[2], bbox[1]:bbox[3]]
    img_cropped = img_cropped * mask_cropped[..., None]

    old_image_height, old_image_width, channels = img_cropped.shape

    # create new image of desired size and color (blue) for padding
    new_image_width = 300
    new_image_height = 300
    color = (0,0,0)
    result = np.full((new_image_height,new_image_width, channels), color, dtype=np.uint8)

    # compute center offset
    x_center = (new_image_width - old_image_width) // 2
    y_center = (new_image_height - old_image_height) // 2

    # copy img image into center of result image
    result[y_center:y_center+old_image_height, 
        x_center:x_center+old_image_width] = img_cropped


    return result


In [10]:
from tqdm import tqdm


np.set_printoptions(suppress=True)
labels = []

for i in tqdm(range(len(train_ds))):
    image = cv2.imread("../" + train_ds.iloc[i]["Image path"])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    img = process_image(image)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    img = cv2.resize(img, (100,100))
    
    cv2.imwrite(f"../Dataset/Processed/Train/cacao-{i}.jpg", img)
    labels.append([f"cacao-{i}.jpg", train_ds.iloc[i]["Class"], train_ds.iloc[i]["Weight"]])

df = pd.DataFrame(labels, columns=["image", "class", "weight"])
df.to_csv("../Dataset/Processed/labels_train.csv")



100%|██████████| 1612/1612 [03:15<00:00,  8.26it/s]
