Image preprocessing is an essential step in preparing data for object detection tasks using YOLOv8 or any other object detection algorithm. It involves applying various techniques to enhance the quality of the input images and ensure consistent representation for improved detection accuracy. Here's a comprehensive guide to image preprocessing for optimizing JPEG images for YOLOv8 object detection in Python:

In [None]:
# import required libraries
import os
import numpy as np
import cv2
import random

In [None]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            images.append(img)
    return images

In [None]:
path_to_images = "images"
images_to_process = load_images_from_folder(path_to_images)
print(f"Test dataset contains {len(images_to_process)} images.")

In [None]:
# step 1
resized_images = [cv2.resize(image, (608, 608)).astype(np.float32) for image in images_to_process]

# step 2
normalized_images = [cv2.normalize(resized_image, None, 0, 1, cv2.NORM_MINMAX) for resized_image in resized_images] 

# step 3
BGR_images = [cv2.cvtColor(normalized_image, cv2.COLOR_RGB2BGR) for normalized_image in normalized_images]

In [None]:
# save processed images in path to images
for i,BRG_image in enumerate(BGR_images):
    img_to_save = cv2.convertScaleAbs(BRG_image, alpha=(255.0))
    cv2.imwrite(f'{path_to_images}/processed_image_{i}.jpg',img_to_save)

In [None]:
# Show image in a separate window
# cv2.imshow('Preprocessed Image', BRG_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

4. Data Augmentation:

Data augmentation involves applying random transformations to the images to artificially increase the dataset size and improve the model's generalization ability. Techniques like random cropping, flipping, and brightness adjustments can be used. OpenCV provides methods for these operations.

In [None]:
def random_crop_coordinates(image_shape):
  """
  This function generates a tuple of random coordinates for cropping an image.

  params: image_shape: a tuple containing the height and width of the image.

  Returns: tuple containing four values: (x_min, y_min, width, height)
  """

  height, width = image_shape
  crop_height = random.randint(int(0.5 * height), height)
  crop_width = random.randint(int(0.5 * width), width)
  x_min = random.randint(0, width - crop_width)
  y_min = random.randint(0, height - crop_height)

  return x_min, y_min, crop_width, crop_height

In [None]:
# Random cropping
height, width, channels = BGR_images[0].shape
image_shape = (height,width)
num_random_images = 10

for i in range(num_random_images):
    x_min, y_min, width, height = random_crop_coordinates(image_shape)
    random_image_idx = np.random.choice(range(len(BGR_images)),1,replace=True)[0]
    random_image = BGR_images[random_image_idx]
    cropped_image = random_image[y_min:y_min+height, x_min:x_min+width]

    # Random flipping
    if random.random() < 0.5:
        flipped_image = cv2.flip(cropped_image, 1)
    else:
        flipped_image = cropped_image

    img_to_save = cv2.convertScaleAbs(flipped_image, alpha=(255.0))
    cv2.imwrite(f'{path_to_images}/augmented_image_{i}.jpg', img_to_save)