##Object detection with YOLOv5
- Firts attempt to fine tune YOLOv5 according to this tutorial:
https://curiousily.com/posts/object-detection-on-custom-dataset-with-yolo-v5-using-pytorch-and-python/
- Dataset we added bounding boxes to: https://www.kaggle.com/moltean/fruits

In [None]:

import os
import sys
import torch
from google.colab import drive
import numpy as np
import PIL.Image as Image
import cv2
import pathlib
from sklearn.model_selection import train_test_split
from pathlib import Path

In [3]:
PATH_TO_DATA = "/content/gdrive/MyDrive/fruits-360_dataset/"

In [4]:
# mount google drive 
drive.mount('/content/gdrive')
sys.path.append(PATH_TO_DATA)
# change current working directory to /0db, where are Tokenizer module is 
%cd "/content/gdrive/MyDrive/fruits-360_dataset/"
%ls

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/MyDrive/fruits-360_dataset
[0m[01;34mdatasets[0m/  [01;34mtest[0m/  [01;34mtrain[0m/  [01;34myolov5[0m/


In [None]:
def resize(image_path):
  # read image shape 
  img = cv2.imread(image_path)
  height, width, channels = img.shape   
  img = cv2.rectangle(
      img,
      (int(0), int(0)),
      (int(width-5), int(height-5)),
      color=(0, 255, 0),
      thickness=2
    )
  cv2.imwrite(image_path, img)

In [None]:
# add some bounding boxes to the data
path = PATH_TO_DATA
i = 0

train_files = []
train_labels = []
test_files = []
test_labels = []
#keeps track of the available classes
categories = []

# subsets are train and test data
for subset in os.listdir(path):
    if subset == 'train':
      path = os.path.join(path, subset)
      for category in os.listdir(path):
        category_path = os.path.join(path, category)
        categories.append(category)
        
        for filename in os.listdir(category_path):
          if filename.endswith(".jpg"):
            image_path = os.path.join(category_path, filename)
            # maybe add resizing step??
            resize(image_path)
            train_files.append(image_path)
            train_labels.append(category)
            print(image_path)
            
            i = i+1
            print(i)
    else:
       path = os.path.join(path, subset)
       for category in os.listdir(path):
          category_path = os.path.join(path, category)
          
          for filename in os.listdir(category_path):
            if filename.endswith(".jpg"):
              image_path = os.path.join(category_path, filename)
              # maybe add resizing step??
              print(image_path)
              resize(image_path)
              test_files.append(image_path)
              test_labels.append(category)
              i = i+1
              print(i)
      
    path = PATH_TO_DATA

In [None]:
# test if number of classes were stored correctly 
print(categories)

['Apple Red 1', 'Banana', 'Orange', 'Avocado', 'Kiwi', 'Strawberry', 'Peach', 'Tomato 1', 'Potato Red', 'Apricot']


In [None]:

def create_dataset(files, categories, dataset_type):
  
  images_path = Path(f"/content/gdrive/MyDrive/fruit/images/{dataset_type}")
  images_path.mkdir(parents=True, exist_ok=True)
  labels_path = Path(f"/content/gdrive/MyDrive/fruit/labels/{dataset_type}")
  labels_path.mkdir(parents=True, exist_ok=True)
  
  for single_file in files:
    
    #save image in new path to ensure compliance with darknet format
    old_image_path = pathlib.PurePath(single_file)     
    img = Image.open(single_file)
    img = img.convert("RGB")
    image_name = os.path.basename(single_file)

    img.save(str(images_path / image_name))
    
    # create files for labels according to darknet format
    label_name = f"{image_name.replace('.jpg', '')}.txt"
    
    with (labels_path / label_name).open(mode="w") as label_file:
      
      category_idx = categories.index(old_image_path.parent.name)      
      img = cv2.imread(single_file)
      height, width, channels = img.shape
      x1, y1 = 0, 0
      x2, y2 = width, height
      bbox_width = x2 - x1
      bbox_height = y2 - y1
      label_file.write(
            f"{category_idx} {x1 + bbox_width / 2} {y1 + bbox_height / 2} {bbox_width} {bbox_height}\n"
          )
  

In [None]:
# split original train set to train and val set

train_fruit, val_fruit = train_test_split(train_files, test_size=0.1)


# create folders with train and val datasets
create_dataset(train_fruit, categories, 'train')
create_dataset(val_fruit, categories, 'val')


In [None]:
# import YOLO5
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
%pip install -r requirements.txt



In [None]:
# fine tune model on our own dataset
!python train.py --img 640 --batch 4 --epochs 30 \
  --data ./data/fruit.yaml --cfg ./models/yolov5x.yaml --weights yolov5x.pt \
  --name yolov5x_fruit --cache