In [None]:
import os
import random
import kagglehub
import glob

# Download latest version
path = kagglehub.dataset_download("moltean/fruits")

print("Path to dataset files:", path)

train_dir = os.path.join(path, 'fruits-360_100x100/fruits-360', 'Training')
test_dir = os.path.join(path, 'fruits-360_100x100/fruits-360', 'Test')

print("Training classes:", len(os.listdir(train_dir)))
print("Test classes:", len(os.listdir(test_dir)))

import glob
import shutil

num_images = 30
output_dir = 'object_detection'
os.makedirs(os.path.join(output_dir, 'images/train'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'images/val'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'labels/train'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'labels/val'), exist_ok=True)

# Randomly select images from a few classes (e.g., 5 classes, 5 images each)
classes = ['Apple Braeburn 1', 'Banana 1', 'Orange 1', 'Mango 1', 'Strawberry 1', 'Avocado Black 1']
images = []
for cls in classes:
    cls_images = glob.glob(os.path.join(train_dir, cls, '*.jpg'))
    selected = random.sample(cls_images, min(5, len(cls_images)))
    images.extend(selected)

random.shuffle(images)
train_images = images[:24] # 80% train
val_images = images[24:] # 20% val

print(len(train_images))
print(len(val_images))


# Copy images to object_detection/
for img in train_images:
    shutil.copy(img, os.path.join(output_dir, 'images/train', os.path.basename(img)))
for img in val_images:
    shutil.copy(img, os.path.join(output_dir, 'images/val', os.path.basename(img)))


Path to dataset files: /kaggle/input/fruits
Training classes: 207
Test classes: 207
24
6


In [None]:

# Install dependencies 
# !pip install ultralytics kagglehub

# Manual annotation step 
# Used LabelImg to create YOLO-format labels (.txt files) in labels/train and labels/val

# Create data.yaml ---
data_yaml = f"""
train: {os.path.abspath(output_dir)}/images/train
val: {os.path.abspath(output_dir)}/images/val

nc: {len(classes)}
names: {classes}
"""
with open(os.path.join(output_dir, 'data.yaml'), 'w') as f:
    f.write(data_yaml)

print(open(os.path.join(output_dir, 'data.yaml')).read())

# Train YOLOv8 model 
from ultralytics import YOLO

model = YOLO('yolov8n.pt')  # small model for speed
model.train(
    data=os.path.join(output_dir, 'data.yaml'),
    epochs=30,
    imgsz=100,
    project='object_detection',
    name='fruits_yolo'
)

# Run inference on validation set 
results = model.predict(source=os.path.join(output_dir, 'images/val'), save=True)

# Show results 
import matplotlib.pyplot as plt
import glob
from PIL import Image

pred_dir = 'runs/predict'  # ultralytics output folder may vary
pred_images = glob.glob(pred_dir + '//*.jpg', recursive=True)[:5]

plt.figure(figsize=(15, 10))
for i, img_path in enumerate(pred_images):
    plt.subplot(1, len(pred_images), i+1)
    plt.imshow(Image.open(img_path))
    plt.axis('off')
plt.show()

# Save final model 
model.export(format='pt', path=os.path.join(output_dir, 'fruits_yolo.pt'))
