## Used for converting annotations into OBB format and splitting files into 80/20/10 train/val/test

In [ ]:
import os
import shutil
from sklearn.model_selection import train_test_split
import glob

In [2]:
source_dir = 'cars-me' #original dataset folder, not included in the project
images_dir = 'images'
texts_dir = 'annotations'

os.makedirs(images_dir, exist_ok=True)
os.makedirs(texts_dir, exist_ok=True)
files = os.listdir(source_dir)

for file in files:
    file_path = os.path.join(source_dir, file)
    
    if os.path.isfile(file_path):
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp')):
            shutil.move(file_path, os.path.join(images_dir, file))
        elif file.lower().endswith('.txt'):
            shutil.move(file_path, os.path.join(texts_dir, file))


FileNotFoundError: [WinError 3] Systemet finner ikke angitt bane: 'cars-me'

In [2]:
source_dir = 'annotations'
output_dir = 'new_annotations'
os.makedirs(output_dir, exist_ok=True)

def process_record(record):
    lines = record.split('\n')
    corners_line = next(line for line in lines if line.startswith('corners:'))
    corners_str = corners_line.split('corners: ')[1]
    corners = [int(coord) for point in corners_str.split() for coord in point.split(',')]
    
    class_id = 0
    image_width, image_height = 1280, 720  # Adjust these values as needed    
    normalized_corners = [corners[i] / image_width if i % 2 == 0 else corners[i] / image_height for i in range(len(corners))]    
    formatted_annotation = f'{class_id} ' + ' '.join(f'{coord:.15f}' for coord in normalized_corners)
    
    return formatted_annotation

for txt_file in glob.glob(os.path.join(source_dir, '*.txt')):
    with open(txt_file, 'r') as infile:
        content = infile.read().strip()
        formatted_annotation = process_record(content)
        
        output_file_path = os.path.join(output_dir, os.path.basename(txt_file))        
        with open(output_file_path, 'w') as outfile:
            outfile.write(formatted_annotation + '\n')

print("Conversion completed.")

Conversion completed.


In [5]:
images_dir = './Images'
annotations_dir = './Annotations'

base_output_dir = './Dataset'

sets = ['train', 'valid', 'test']
categories = ['images', 'labels']

for set_name in sets:
    for category in categories:
        os.makedirs(os.path.join(base_output_dir, set_name, category), exist_ok=True)

image_filenames = [os.path.splitext(f)[0] for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))]

train_filenames, temp_filenames = train_test_split(image_filenames, test_size=0.3, random_state=42)  # 70% for training, 30% for valid+test
valid_filenames, test_filenames = train_test_split(temp_filenames, test_size=(1/3), random_state=42)  # Split the 30% into 20% valid, 10% test

def copy_files(filenames, src_dir, dst_dir, category, extension):
    for filename in filenames:
        src_file = os.path.join(src_dir, filename + extension)
        dst_file = os.path.join(dst_dir, category, filename + extension)
        shutil.copy(src_file, dst_file)

for set_name, filenames in zip(['train', 'valid', 'test'], [train_filenames, valid_filenames, test_filenames]):
    images_output_dir = os.path.join(base_output_dir, set_name)
    labels_output_dir = images_output_dir  # Images and labels are stored in the same set directory but different category directories
    copy_files(filenames, images_dir, images_output_dir, 'images', '.jpg')  # Adjust extension as necessary
    copy_files(filenames, annotations_dir, labels_output_dir, 'labels', '.txt')  # Assuming .txt annotation files


Dataset split and organized successfully.


## Used for creating licence plate dataset (no augmentation)

In [None]:
import os

def move_files_to_class_dirs(file_list, source_images_dir, source_labels_dir, base_dest_dir):
    for filename in file_list:
        char_class = filename.split('_')[0]  # Extract character class from filename
        dest_images_dir = os.path.join(base_dest_dir, 'images', char_class)  # Destination directory for images includes character class
        dest_labels_dir = os.path.join(base_dest_dir, 'labels', char_class)  # Destination directory for labels includes character class
        
        # Ensure destination directories exist
        os.makedirs(dest_images_dir, exist_ok=True)
        os.makedirs(dest_labels_dir, exist_ok=True)

        src_image_path = os.path.join(source_images_dir, filename + '.png')
        dst_image_path = os.path.join(dest_images_dir, filename + '.png')
        shutil.move(src_image_path, dst_image_path)

        src_label_path = os.path.join(source_labels_dir, filename + '.txt')
        dst_label_path = os.path.join(dest_labels_dir, filename + '.txt')
        if os.path.exists(src_label_path):  # Check if label file exists before moving
            shutil.move(src_label_path, dst_label_path)


# Configuration
num_samples = 50000
image_size = (900, 250)
font_size = 120
char_count = 7
spacing = 25
fonts_dir = 'fonts'
base_dir = ''
image_dir = os.path.join(base_dir, 'images')
label_dir = os.path.join(base_dir, 'labels')
train_ratio = 0.7
val_ratio = 0.2
# Remaining for test_ratio

# Font Paths
font_paths = [os.path.join(fonts_dir, font) for font in os.listdir(fonts_dir) if font.endswith(('.otf', '.ttf'))]

# Ensure directories exist
os.makedirs(image_dir, exist_ok=True)
os.makedirs(label_dir, exist_ok=True)

def generate_license_plate_and_label(index):
    # Create a blank image
    img = Image.new('RGB', image_size, 'white')
    draw = ImageDraw.Draw(img)

    # Select random characters
    characters = ''.join(random.choices(string.ascii_uppercase, k=3)) + ''.join(random.choices(string.digits, k=4))

    # Initialize position with increased padding for the first character
    initial_padding = 140  # Increase this value to add more padding
    x_pos = initial_padding

    # Store bounding box data
    bounding_boxes = []

    for i, char in enumerate(characters):
        # Select a random font
        font_path = random.choice(font_paths)
        font = ImageFont.truetype(font_path, font_size)

        # Measure character
        char_width, char_height = draw.textsize(char, font=font)

        # Calculate bounding box coordinates
        x1, y1 = x_pos, (image_size[1] - char_height) // 2
        x2, y2 = x1 + char_width, y1 + char_height

        # Normalize coordinates to [0, 1] by dividing by image dimensions
        nx1, ny1 = x1 / image_size[0], y1 / image_size[1]
        nx2, ny2 = x2 / image_size[0], y1 / image_size[1]  # Top-right, same y as y1 because it's the upper side
        nx3, ny3 = x1 / image_size[0], y2 / image_size[1]  # Bottom-left, same x as x1 because it's the left side
        nx4, ny4 = x2 / image_size[0], y2 / image_size[1]  # Bottom-right

        # Ensure coordinates are within the valid range
        nx1, ny1, nx2, ny2, nx3, ny3, nx4, ny4 = map(lambda x: max(0, min(x, 1)), [nx1, ny1, nx2, ny2, nx3, ny3, nx4, ny4])

        # Determine class index (0-9 for digits, 10-35 for uppercase letters)
        if char.isdigit():
            class_index = int(char)
        else:
            class_index = ord(char) - 65 + 10  # Convert ASCII character to index (A=10, B=11, ..., Z=35)

        # Record normalized bounding box with all corners
        bounding_boxes.append(f"{class_index} {nx1:.6f} {ny1:.6f} {nx2:.6f} {ny2:.6f} {nx3:.6f} {ny3:.6f} {nx4:.6f} {ny4:.6f}")

        # Draw character
        draw.text((x_pos, (image_size[1] - char_height) // 2), char, fill="black", font=font)

        # Update x_pos for the next character
        x_pos += char_width + spacing

    # Save the image
    img_path = os.path.join(image_dir, f"plate_{index}.png")
    img.save(img_path)

    # Save bounding boxes with normalized coordinates
    label_path = os.path.join(label_dir, f"plate_{index}.txt")
    with open(label_path, 'w') as file:
        file.write('\n'.join(bounding_boxes))





# Generate and save license plates and labels
for i in range(num_samples):
    generate_license_plate_and_label(i)

from sklearn.model_selection import train_test_split

def adjusted_split_data(images, labels, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    """
    Adjust the data splitting function to ensure a 70-20-10 train-validation-test split.
    """
    # Ensure the ratios sum to approximately 1
    assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-6, "Ratios must sum to 1"

    # First, split into training and temp (val + test) sets
    train_img, temp_img, train_lbl, temp_lbl = train_test_split(images, labels, test_size=(1.0 - train_ratio), random_state=42)

    # Calculate the proportion of temp that should go to validation to achieve the overall desired ratio
    # This is calculated by the proportion of val_ratio in the sum of val_ratio and test_ratio
    proportion_val = val_ratio / (val_ratio + test_ratio)

    # Then, split the temp into validation and test sets
    val_img, test_img, val_lbl, test_lbl = train_test_split(temp_img, temp_lbl, test_size=(1.0 - proportion_val), random_state=42)

    return (train_img, val_img, test_img), (train_lbl, val_lbl, test_lbl)



for subset in ['train', 'val', 'test']:
    os.makedirs(os.path.join(image_dir, subset), exist_ok=True)
    os.makedirs(os.path.join(label_dir, subset), exist_ok=True)

images = [f for f in os.listdir(image_dir) if f.endswith('.png')]
labels = [f.replace('.png', '.txt') for f in images]  # Assuming label files match image files exactly


(image_train, image_val, image_test), (label_train, label_val, label_test) = adjusted_split_data(images, labels)


def setup_directories(base_dir):
    for subset in ['train', 'val', 'test']:
        os.makedirs(os.path.join(base_dir, subset, 'images'), exist_ok=True)
        os.makedirs(os.path.join(base_dir, subset, 'labels'), exist_ok=True)

def move_files_to_subsets(files, source_dir, destination_dir, subset):
    for sub_dir in ['images', 'labels']:
        os.makedirs(os.path.join(destination_dir, subset, sub_dir), exist_ok=True)

    for file_name in files:
        if file_name.endswith('.png'):
            sub_dir = 'images'
        elif file_name.endswith('.txt'):
            sub_dir = 'labels'
        else:
            continue 

        source_path = os.path.join(source_dir, sub_dir, file_name)
        destination_path = os.path.join(destination_dir, subset, sub_dir, file_name)
        
        shutil.move(source_path, destination_path)


setup_directories(base_dir)

move_files_to_subsets(image_train + label_train, base_dir, base_dir, 'train')
move_files_to_subsets(image_val + label_val, base_dir, base_dir, 'val')
move_files_to_subsets(image_test + label_test, base_dir, base_dir, 'test')




## Used for creating licence plate dataset (some augmentation)

In [1]:
import os
import shutil
from PIL import Image, ImageFont
import string
import random
from PIL import ImageDraw

def move_files_to_class_dirs(file_list, source_images_dir, source_labels_dir, base_dest_dir):
    for filename in file_list:
        char_class = filename.split('_')[0] 
        dest_images_dir = os.path.join(base_dest_dir, 'images', char_class)  
        dest_labels_dir = os.path.join(base_dest_dir, 'labels', char_class)  
        
        # Ensure destination directories exist
        os.makedirs(dest_images_dir, exist_ok=True)
        os.makedirs(dest_labels_dir, exist_ok=True)

        src_image_path = os.path.join(source_images_dir, filename + '.png')
        dst_image_path = os.path.join(dest_images_dir, filename + '.png')
        shutil.move(src_image_path, dst_image_path)

        src_label_path = os.path.join(source_labels_dir, filename + '.txt')
        dst_label_path = os.path.join(dest_labels_dir, filename + '.txt')
        if os.path.exists(src_label_path):  # Check if label file exists before moving
            shutil.move(src_label_path, dst_label_path)

# Configuration
num_samples = 100
image_size = (900, 250)
font_size = 120
char_count = 7
spacing = 25
fonts_dir = 'fonts'
base_dir = ''
image_dir = os.path.join(base_dir, 'images')
label_dir = os.path.join(base_dir, 'labels')
train_ratio = 0.7
val_ratio = 0.2
# Remaining for test_ratio

# Font Paths
font_paths = [os.path.join(fonts_dir, font) for font in os.listdir(fonts_dir) if font.endswith(('.otf', '.ttf'))]

# Ensure directories exist
os.makedirs(image_dir, exist_ok=True)
os.makedirs(label_dir, exist_ok=True)

def add_random_shapes(draw, image_size, exclude_areas):
    max_shapes = 20  # Increase the number of shapes
    min_size = 10  # Minimum size of the shapes
    max_size = 60  # Maximum size of the shapes, making them smaller

    for _ in range(random.randint(5, max_shapes)):  # Generate more shapes
        shape_type = random.choice(['ellipse', 'rectangle', 'line', 'arc', 'chord', 'pieslice'])
        attempts = 0
        while attempts < 20:
            attempts += 1
            x0, y0 = random.randint(0, image_size[0]), random.randint(0, image_size[1])
            x1, y1 = x0 + random.randint(min_size, max_size), y0 + random.randint(min_size, max_size)
            current_area = (x0, y0, x1, y1)

            if not any(is_overlap(current_area, area) for area in exclude_areas):
                if shape_type == 'ellipse':
                    draw.ellipse([x0, y0, x1, y1], fill="black", outline=None)
                elif shape_type == 'rectangle':
                    draw.rectangle([x0, y0, x1, y1], fill="black", outline=None)
                elif shape_type == 'line':
                    draw.line([x0, y0, x1, y1], fill="black", width=random.randint(1, 3))
                elif shape_type == 'arc':
                    start_angle = random.randint(0, 360)
                    end_angle = start_angle + random.randint(45, 180)
                    draw.arc([x0, y0, x1, y1], start=start_angle, end=end_angle, fill="black")
                elif shape_type == 'chord':
                    start_angle = random.randint(0, 360)
                    end_angle = start_angle + random.randint(45, 180)
                    draw.chord([x0, y0, x1, y1], start=start_angle, end=end_angle, fill="black")
                elif shape_type == 'pieslice':
                    start_angle = random.randint(0, 360)
                    end_angle = start_angle + random.randint(45, 180)
                    draw.pieslice([x0, y0, x1, y1], start=start_angle, end=end_angle, fill="black")
                break



def is_overlap(rect1, rect2):
    """
    Check if two rectangles overlap.
    """
    return not (rect1[2] < rect2[0] or rect1[0] > rect2[2] or rect1[3] < rect2[1] or rect1[1] > rect2[3])


def generate_license_plate_and_label(index):
    # Create a blank image
    img = Image.new('RGB', image_size, 'white')
    draw = ImageDraw.Draw(img)
    exclude_areas = []  
    characters = ''.join(random.choices(string.ascii_uppercase, k=3)) + ''.join(random.choices(string.digits, k=4))
    initial_padding = 140
    x_pos = initial_padding
    bounding_boxes = []

    for i, char in enumerate(characters):
        font_path = random.choice(font_paths)
        font = ImageFont.truetype(font_path, font_size)
        char_width, char_height = draw.textsize(char, font=font)
        y_pos = (image_size[1] - char_height) // 2
        draw.text((x_pos, y_pos), char, fill="black", font=font)

        exclude_areas.append((x_pos, y_pos, x_pos + char_width, y_pos + char_height))

        x1, y1 = x_pos, y_pos
        x2, y2 = x_pos + char_width, y_pos
        x3, y3 = x_pos + char_width, y_pos + char_height
        x4, y4 = x_pos, y_pos + char_height

        # Normalize coordinates
        nx1, ny1 = x1 / image_size[0], y1 / image_size[1]
        nx2, ny2 = x2 / image_size[0], y2 / image_size[1]
        nx3, ny3 = x3 / image_size[0], y3 / image_size[1]
        nx4, ny4 = x4 / image_size[0], y4 / image_size[1]

        # Determine class index (0-9 for digits, 10-35 for uppercase letters)
        if char.isdigit():
            class_index = int(char)
        else:
            class_index = ord(char) - 65 + 10  # Convert ASCII character to index (A=10, B=11, ..., Z=35)

        bounding_boxes.append(f"{class_index} {nx1:.6f} {ny1:.6f} {nx2:.6f} {ny2:.6f} {nx3:.6f} {ny3:.6f} {nx4:.6f} {ny4:.6f}")

        x_pos += char_width + spacing

    add_random_shapes(draw, image_size, exclude_areas)
    img_path = os.path.join(image_dir, f"plate_{index}.png")
    img.save(img_path)

    label_path = os.path.join(label_dir, f"plate_{index}.txt")
    with open(label_path, 'w') as file:
        file.write('\n'.join(bounding_boxes))


for i in range(num_samples):
    generate_license_plate_and_label(i)

from sklearn.model_selection import train_test_split

def adjusted_split_data(images, labels, train_ratio=0.7, val_ratio=0.2, test_ratio=0.1):
    # Ensure the ratios sum to approximately 1
    assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-6, "Ratios must sum to 1"

    train_img, temp_img, train_lbl, temp_lbl = train_test_split(images, labels, test_size=(1.0 - train_ratio), random_state=42)
    proportion_val = val_ratio / (val_ratio + test_ratio)
    val_img, test_img, val_lbl, test_lbl = train_test_split(temp_img, temp_lbl, test_size=(1.0 - proportion_val), random_state=42)

    return (train_img, val_img, test_img), (train_lbl, val_lbl, test_lbl)


for subset in ['train', 'val', 'test']:
    os.makedirs(os.path.join(image_dir, subset), exist_ok=True)
    os.makedirs(os.path.join(label_dir, subset), exist_ok=True)

images = [f for f in os.listdir(image_dir) if f.endswith('.png')]
labels = [f.replace('.png', '.txt') for f in images]  # Assuming label files match image files exactly

(image_train, image_val, image_test), (label_train, label_val, label_test) = adjusted_split_data(images, labels)

def setup_directories(base_dir):
    for subset in ['train', 'val', 'test']:
        os.makedirs(os.path.join(base_dir, subset, 'images'), exist_ok=True)
        os.makedirs(os.path.join(base_dir, subset, 'labels'), exist_ok=True)

def move_files_to_subsets(files, source_dir, destination_dir, subset):
    # Ensure the correct subdirectories exist in the destination
    for sub_dir in ['images', 'labels']:
        os.makedirs(os.path.join(destination_dir, subset, sub_dir), exist_ok=True)

    for file_name in files:
        if file_name.endswith('.png'):
            sub_dir = 'images'
        elif file_name.endswith('.txt'):
            sub_dir = 'labels'
        else:
            continue  # Skip if the file doesn't match expected patterns

        source_path = os.path.join(source_dir, sub_dir, file_name)
        destination_path = os.path.join(destination_dir, subset, sub_dir, file_name)
        
        shutil.move(source_path, destination_path)


setup_directories(base_dir)
move_files_to_subsets(image_train + label_train, base_dir, base_dir, 'train')
move_files_to_subsets(image_val + label_val, base_dir, base_dir, 'val')
move_files_to_subsets(image_test + label_test, base_dir, base_dir, 'test')

  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=font)
  char_width, char_height = draw.textsize(char, font=fon