In [None]:
RAW_DIR = "./raw/"
PROCESSED_DIR = "./processed/"

CONVERTED_DIR = PROCESSED_DIR + "converted/"
CROPPED_DIR = PROCESSED_DIR + "cropped/"


In [None]:
# Convert files from WEBP to PNG

import os
from PIL import Image
import glob

# Create processed directory if it doesn't exist
os.makedirs(CONVERTED_DIR, exist_ok=True)

# Find all webp files in the raw directory
webp_files = glob.glob(os.path.join(RAW_DIR, "*.webp"))

# Convert each webp file to png
for webp_file in webp_files:
    filename = os.path.basename(webp_file)
    name_without_ext = os.path.splitext(filename)[0]
    output_path = os.path.join(CONVERTED_DIR, f"{name_without_ext}.png")

    try:
        # Open the webp image and convert it to PNG
        image = Image.open(webp_file)
        image.save(output_path, "PNG")
        print(f"Converted: {filename} -> {name_without_ext}.png")
    except Exception as e:
        print(f"Error converting {filename}: {e}")

print(f"Conversion complete! {len(webp_files)} files processed.")

In [None]:
# Crop top 100px to remove AUTO.RIA watermark

from PIL import Image
import os
import glob

# Create a subdirectory for cropped images
os.makedirs(CROPPED_DIR, exist_ok=True)

# Get all PNG files in the processed directory
png_files = glob.glob(os.path.join(CONVERTED_DIR, "*.png"))

for png_file in png_files:
    try:
        img = Image.open(png_file)
        width, height = img.size

        img = img.crop((0, 100, width, height))

        # Save with new filename
        filename = os.path.basename(png_file)
        save_path = os.path.join(CROPPED_DIR, filename)
        img.save(save_path)
        print(f"Cropped and resized: {filename}")
    except Exception as e:
        print(f"Error processing {png_file}: {e}")

print(f"Processing complete! {len(png_files)} files processed.")

In [None]:
# Split all the files into train, test, val for YOLO

import os
import random
import shutil
import glob

# Create train, test, and val directories
TRAIN_DIR = "./train/"
TEST_DIR = "./test/"
VAL_DIR = "./val/"

# Create directories if they don't exist
os.makedirs(TRAIN_DIR, exist_ok=True)
os.makedirs(TEST_DIR, exist_ok=True)
os.makedirs(VAL_DIR, exist_ok=True)

# Get all images from the cropped directory
all_images = [f for f in os.listdir(CROPPED_DIR) if f.endswith(".png")]
random.shuffle(all_images)  # Shuffle to ensure random distribution

# Calculate split indices
total_images = len(all_images)
train_split = int(0.8 * total_images)
test_split = int(0.1 * total_images)

# Split the data
train_images = all_images[:train_split]
test_images = all_images[train_split : train_split + test_split]
val_images = all_images[train_split + test_split :]


# Function to copy both image and its corresponding label file
def copy_with_label(img, src_dir, dst_dir):
    # Copy image
    img_src = os.path.join(src_dir, img)
    img_dst = os.path.join(dst_dir, img)
    shutil.copy2(img_src, img_dst)

    # Check and copy corresponding label file if it exists
    base_name = os.path.splitext(img)[0]
    label_file = f"{base_name}.txt"
    label_src = os.path.join(src_dir, label_file)
    label_dst = os.path.join(dst_dir, label_file)

    if os.path.exists(label_src):
        shutil.copy2(label_src, label_dst)


# Copy images and label files to respective directories
for img in train_images:
    copy_with_label(img, CROPPED_DIR, TRAIN_DIR)

for img in test_images:
    copy_with_label(img, CROPPED_DIR, TEST_DIR)

for img in val_images:
    copy_with_label(img, CROPPED_DIR, VAL_DIR)

print("Split complete:")
print(f"Train: {len(train_images)} images")
print(f"Test: {len(test_images)} images")
print(f"Validation: {len(val_images)} images")

In [None]:
# Extract all images with licence plates into ocr_val directory

from PIL import Image
import os
import glob

OCR_VAL = "./ocr_val/"
SRC_DIR = CROPPED_DIR

# Create subdirectory
os.makedirs(OCR_VAL, exist_ok=True)


# Function to copy both image and its corresponding label file
def copy_with_image(label, src_dir, dst_dir):
    base_name = os.path.splitext(os.path.basename(label))[0]

    lable_file = f"{base_name}.txt"
    label_src = os.path.join(src_dir, lable_file)
    label_dst = os.path.join(dst_dir, lable_file)

    # Copy image
    image_file = f"{base_name}.png"
    img_src = os.path.join(src_dir, image_file)
    img_dst = os.path.join(dst_dir, image_file)
    shutil.copy2(img_src, img_dst)

    if os.path.exists(label_src):
        shutil.copy2(label_src, label_dst)


all_labels = glob.glob(os.path.join(SRC_DIR, "*.txt"))

for lable_file in all_labels:
    with open(lable_file, "r") as file:
        for line in file:
            if not line[0] == "0":
                continue
            copy_with_image(lable_file, SRC_DIR, OCR_VAL)
            print(f"{lable_file} has been coppied into {OCR_VAL}")

In [None]:
import yaml
from glob import glob

all_images = glob("./ocr_val/*.png")

with open("val_plate_numbers.yaml", "r") as f:
    existing_yaml = yaml.safe_load(f)

for f in all_images:
    base_name = os.path.splitext(os.path.basename(f))[0]
    if base_name in existing_yaml.keys():
        continue
    existing_yaml[base_name] = None

with open("val_plate_numbers.yaml", "w") as f:
    f.write(yaml.safe_dump(existing_yaml, sort_keys=True))

In [None]:
import yaml
from glob import glob
import os

all_labels = glob("./ocr_val/*.txt")

vehicle_type = {}
for f_path in all_labels:
    base_name = os.path.splitext(os.path.basename(f_path))[0]
    vehicle_type[base_name] = ""
    with open(f_path, "r") as f:
        for line in f:
            if line[0] == "0":
                continue
            vehicle_type[base_name] += line[0]

vehicle_type

with open("val_plate_type.yaml", "w") as f:
    f.write(yaml.safe_dump(vehicle_type, sort_keys=True))

In [None]:
from glob import glob
import os

all_labels = glob("processed/cropped/*.txt")

lables = {}

for path in all_labels:
    with open(path, "r") as f:
        for line in f:
            if not line[0] in lables.keys():
                lables[line[0]] = 1
            lables[line[0]] += 1

lables