Tomato maturity dataset from https://www.kaggle.com/datasets/nexuswho/laboro-tomato

Actually this is object detection task using COCO and YOLO annotations format, so we convert it into classification task by extracting each bounding box and crop it into single image.

# Download Dataset

In [1]:
!pip install -q kaggle
from google.colab import files
files.upload()

!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [2]:
!kaggle datasets download -d nexuswho/laboro-tomato

Downloading laboro-tomato.zip to /content
 99% 1.52G/1.53G [00:20<00:00, 109MB/s] 
100% 1.53G/1.53G [00:20<00:00, 79.1MB/s]


In [None]:
!unzip laboro-tomato.zip

# Convert to classification task

In [7]:
import re
import os
import cv2
import shutil
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

- b_fully_ripened
- b_half_ripened
- b_green
- l_fully_ripened
- l_half_ripened
- l_green

In [8]:
def extractBbox(label_directory, image_directory):

  image = []
  x_center, y_center, width_box, height_box, label = [],[],[],[],[]

  for annot, img in zip(
      sorted(os.listdir(label_directory)),
      sorted(os.listdir(image_directory))
      ):

    annot_path = os.path.join(label_directory, annot)
    img_path = os.path.join(image_directory, img)

    with open(annot_path, 'r') as file:
      for line in file:
        image.append(img_path)
        data = [float(i) for i in line.split()]
        x_center.append(data[1]); y_center.append(data[2]); width_box.append(data[3]); height_box.append(data[4]); label.append(data[0])

  return pd.DataFrame({
      "image_path":image, "x_center":x_center, "y_center":y_center,
      "width_box":width_box, "height_box":height_box, "label":label
      })

In [9]:
training_data = extractBbox("/content/train/labels", "/content/train/images")
validation_data = extractBbox("/content/val/labels", "/content/val/images")

# jadikan 3 class aja
tomato_class = {0.0:"fully_ripened", 1.0:"half_ripened", 2.0:"green", 3.0:"fully_ripened",
                4.0:"half_ripened", 5.0:"green"}

training_data['label'] = training_data['label'].map(tomato_class)
validation_data['label'] = validation_data['label'].map(tomato_class)

In [10]:
training_data.head()

Unnamed: 0,image_path,x_center,y_center,width_box,height_box,label
0,/content/train/images/IMG_0984.jpg,0.5885,0.4466,0.3028,0.205,green
1,/content/train/images/IMG_0984.jpg,0.382,0.396,0.1677,0.1596,green
2,/content/train/images/IMG_0984.jpg,0.4848,0.5215,0.1638,0.1136,green
3,/content/train/images/IMG_0985.jpg,0.832,0.4307,0.0741,0.0596,fully_ripened
4,/content/train/images/IMG_0985.jpg,0.7743,0.4541,0.0786,0.0526,fully_ripened


In [None]:
def cropped_image(dataframe):
  image_path = dataframe['image_path']
  image = cv2.imread(image_path)
  image_height, image_width, _ = image.shape

  x_center = dataframe['x_center']
  y_center = dataframe['y_center']
  width = dataframe['width_box']
  height = dataframe['height_box']

  x_min = int((x_center - width / 2) * image_width)
  y_min = int((y_center - height / 2) * image_height)
  x_max = int((x_center + width / 2) * image_width)
  y_max = int((y_center + height / 2) * image_height)

  cropped = image[y_min:y_max, x_min:x_max]
  image_name = re.search(r'/([^/]+)$', image_path).group(1)

  return cropped, image_name, dataframe['label']

## Extracting

In [None]:
root_dir = './data'
classes = ['green', 'half_ripened', 'fully_ripened']

for i in classes:
  train_folder = os.path.join(root_dir, "train", i)
  val_folder = os.path.join(root_dir, "val", i)

  os.makedirs(train_folder, exist_ok=True); os.makedirs(val_folder, exist_ok=True)

In [None]:
# Training data
train_path = "/content/data/train"
for _,sample in training_data.iterrows():
  tomato, image_name, label = cropped_image(sample)
  save_path = os.path.join(train_path, label, image_name)
  cv2.imwrite(save_path, tomato)

In [None]:
# Validation data
val_path = "/content/data/val"
for _,sample in validation_data.iterrows():
  tomato, image_name, label = cropped_image(sample)
  save_path = os.path.join(val_path, label, image_name)
  cv2.imwrite(save_path, tomato)