In [8]:
# import necessary packages
import PIL.Image
import matplotlib.pyplot as plt
import torch
from torchvision.transforms import v2
import cv2
import numpy as np
import random
import matplotlib.pyplot as plt
import os

In [9]:
# first i need to make 4 copies of each image with a random colour
# this is to help the model learn that the logos are independent of colour

# Define the range for white pixels in the BGR format
lower_white = np.array([150, 150, 150])  # Lower bound for white (light white)
upper_white = np.array([255, 255, 255])  # Upper bound for white (pure white)

# this changes the white pixels to a random colour
def change_white_to_random_color(image, output_filename):
    mask = cv2.inRange(image, lower_white, upper_white)
   # Generate a random color in the BGR format
    c_1 = random.randint(0, 255)
    c_2 = random.randint(0, 255)
    c_3 = random.randint(0, 255)
    new_color = [c_1, c_2, c_3]

    # Change the color of the white pixels
    image[mask == 255] = new_color

    # Convert BGR image to RGB for displaying with matplotlib
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Save the modified image with the specified filename
    cv2.imwrite(output_filename, image)
    print(f"Image saved as {output_filename}")

    return image

In [22]:
# this is a helper function that returns the paths to the images I have in colab
def image_paths(local_download_path):
  file_paths = []

  for filename in os.listdir(local_download_path):
      if filename.endswith("jpg"):
          file_paths.append(local_download_path + filename)

  return file_paths

BW_paths = image_paths('/content/drive/MyDrive/Logos/BW/')

In [23]:
print(len(BW_paths))

483


In [24]:
bad_paths = []

for path in BW_paths:
  for i in range(1):
    image = cv2.imread(path)
    if image is None:  # Skip if the image is None
        print(f"Skipping {path} as it couldn't be loaded.")
        bad_paths.append(path)
        continue

    name = path[:-4]
    change_white_to_random_color(image.copy(), f'{name}_c_{i}.jpg')
    print(f'{name}_colour_{i}.jpg')

Image saved as /content/drive/MyDrive/Logos/BW/74854_logo_c_0.jpg
/content/drive/MyDrive/Logos/BW/74854_logo_colour_0.jpg
Image saved as /content/drive/MyDrive/Logos/BW/3540546308_logo_c_0.jpg
/content/drive/MyDrive/Logos/BW/3540546308_logo_colour_0.jpg
Image saved as /content/drive/MyDrive/Logos/BW/3540395943_logo_c_0.jpg
/content/drive/MyDrive/Logos/BW/3540395943_logo_colour_0.jpg
Image saved as /content/drive/MyDrive/Logos/BW/147_logo_c_0.jpg
/content/drive/MyDrive/Logos/BW/147_logo_colour_0.jpg
Image saved as /content/drive/MyDrive/Logos/BW/3540342795_logo_c_0.jpg
/content/drive/MyDrive/Logos/BW/3540342795_logo_colour_0.jpg
Image saved as /content/drive/MyDrive/Logos/BW/3405_logo_c_0.jpg
/content/drive/MyDrive/Logos/BW/3405_logo_colour_0.jpg
Image saved as /content/drive/MyDrive/Logos/BW/146_logo_c_0.jpg
/content/drive/MyDrive/Logos/BW/146_logo_colour_0.jpg
Image saved as /content/drive/MyDrive/Logos/BW/80383_logo_c_0.jpg
/content/drive/MyDrive/Logos/BW/80383_logo_colour_0.jpg
Imag

In [25]:
colourized_BW_paths = image_paths('/content/drive/MyDrive/Logos/BW/')
print(len(colourized_BW_paths)) # should be 1924

966


In [28]:
# now define how i want to transform the images
transform = v2.Compose([
    v2.RGB(),
    v2.RandomPerspective(distortion_scale=0.8, p=0.5),
    v2.RandomAffine(degrees=30, translate=(0, 0.1), scale=(0.6, 2)),
    v2.ColorJitter(brightness=.5, hue=.3),
    v2.RandomInvert(0.5)
])


# the folder path where i want to save the images
folder_path = "/content/drive/MyDrive/Logos/Small_Dataset"

# Ensure the folder exists (create it if it doesn't)
os.makedirs(folder_path, exist_ok=True)

def img_augmentor(img_path, old_directory, num_augmentations):
  k = len(old_directory)
  for i in range(num_augmentations):
    name = folder_path + img_path[k:-4] + f'{i}.jpg'
    img = PIL.Image.open(img_path)
    augmented_img = transform(img)
    augmented_img.save(name)
    print(f'Saved as: {name}')

In [29]:
# now run it for my images
for path in colourized_BW_paths:
  img_augmentor(path, '/content/drive/MyDrive/Logos/BW', 10)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo0.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo1.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo2.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo3.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo4.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo5.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo6.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo7.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo8.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540457850_logo9.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540443260_logo0.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540443260_logo1.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Da

In [32]:
# test to make sure i got them all
# i should have 28172
augmented_paths = image_paths('/content/drive/MyDrive/Logos/Small_Dataset')
print(len(augmented_paths))

14686


In [31]:
k = len('/content/drive/MyDrive/Logos/BW')

for path in colourized_BW_paths:
  name = folder_path + path[k:]
  img = PIL.Image.open(path)
  img.save(name)
  print(f'Saved as: {name}')

Saved as: /content/drive/MyDrive/Logos/Small_Dataset/74854_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540546308_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540395943_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/147_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540342795_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3405_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/146_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/80383_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/88_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540478508_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/38189_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/67_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540450709_logo.jpg
Saved as: /content/drive/MyDrive/Logos/Small_Dataset/3540515384_logo.jpg
Saved as: /content/drive/MyDr

In [33]:
# test to make sure i got them all
# i should have 30096
final_paths = image_paths('/content/drive/MyDrive/Logos/Small_Dataset')
print(len(final_paths))

30096
