## Used for converting annotations into OBB format and splitting files into 80/20/10 train/val/test

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split
import glob

In [1]:
source_dir = 'cars-me'
images_dir = 'images'
texts_dir = 'annotations'

os.makedirs(images_dir, exist_ok=True)
os.makedirs(texts_dir, exist_ok=True)
files = os.listdir(source_dir)

for file in files:
    file_path = os.path.join(source_dir, file)
    
    if os.path.isfile(file_path):
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp')):
            shutil.move(file_path, os.path.join(images_dir, file))
        elif file.lower().endswith('.txt'):
            shutil.move(file_path, os.path.join(texts_dir, file))


In [2]:
source_dir = 'annotations'
output_dir = 'new_annotations'
os.makedirs(output_dir, exist_ok=True)

def process_record(record):
    lines = record.split('\n')
    corners_line = next(line for line in lines if line.startswith('corners:'))
    corners_str = corners_line.split('corners: ')[1]
    corners = [int(coord) for point in corners_str.split() for coord in point.split(',')]
    
    class_id = 0
    image_width, image_height = 1280, 720  # Adjust these values as needed    
    normalized_corners = [corners[i] / image_width if i % 2 == 0 else corners[i] / image_height for i in range(len(corners))]    
    formatted_annotation = f'{class_id} ' + ' '.join(f'{coord:.15f}' for coord in normalized_corners)
    
    return formatted_annotation

for txt_file in glob.glob(os.path.join(source_dir, '*.txt')):
    with open(txt_file, 'r') as infile:
        content = infile.read().strip()
        formatted_annotation = process_record(content)
        
        output_file_path = os.path.join(output_dir, os.path.basename(txt_file))        
        with open(output_file_path, 'w') as outfile:
            outfile.write(formatted_annotation + '\n')

print("Conversion completed.")

Conversion completed.


In [5]:
images_dir = './Images'
annotations_dir = './Annotations'

base_output_dir = './Dataset'

sets = ['train', 'valid', 'test']
categories = ['images', 'labels']

for set_name in sets:
    for category in categories:
        os.makedirs(os.path.join(base_output_dir, set_name, category), exist_ok=True)

image_filenames = [os.path.splitext(f)[0] for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))]

train_filenames, temp_filenames = train_test_split(image_filenames, test_size=0.3, random_state=42)  # 70% for training, 30% for valid+test
valid_filenames, test_filenames = train_test_split(temp_filenames, test_size=(1/3), random_state=42)  # Split the 30% into 20% valid, 10% test

def copy_files(filenames, src_dir, dst_dir, category, extension):
    for filename in filenames:
        src_file = os.path.join(src_dir, filename + extension)
        dst_file = os.path.join(dst_dir, category, filename + extension)
        shutil.copy(src_file, dst_file)

for set_name, filenames in zip(['train', 'valid', 'test'], [train_filenames, valid_filenames, test_filenames]):
    images_output_dir = os.path.join(base_output_dir, set_name)
    labels_output_dir = images_output_dir  # Images and labels are stored in the same set directory but different category directories
    copy_files(filenames, images_dir, images_output_dir, 'images', '.jpg')  # Adjust extension as necessary
    copy_files(filenames, annotations_dir, labels_output_dir, 'labels', '.txt')  # Assuming .txt annotation files


Dataset split and organized successfully.


## Used for creating licence plate dataset

In [59]:
from PIL import Image, ImageDraw, ImageFont
import string
import random
import os
import shutil
from sklearn.model_selection import train_test_split

# Configuration parameters
base_dir = 'dataset'
font_path = 'license-plate.ttf'
image_size = (30, 49)  # width, height in pixels
samples_per_char = 5
base_font_size = 50  # Adjusted for more noticeable size variation
max_font_size_variation = 5  # Increased variation range
position_variation_range = 3  # Reduced variation range for position

# Create directories
os.makedirs(images_dir, exist_ok=True)
os.makedirs(annotations_dir, exist_ok=True)

# Character to class ID mapping
char_to_id = {str(i): i for i in range(10)}
char_to_id.update({chr(65+i): 10+i for i in range(26)})

for char in string.digits + string.ascii_uppercase:
    for sample in range(samples_per_char):
        img = Image.new('RGB', image_size, 'white')
        d = ImageDraw.Draw(img)
        
        # Applying variation in font size
        font_size = base_font_size + random.randint(-max_font_size_variation, max_font_size_variation)
        font = ImageFont.truetype(font_path, font_size)
        
        # Applying reduced variation in position
        position_x = image_size[0] // 2 + random.randint(-position_variation_range, position_variation_range)
        position_y = image_size[1] // 2 + random.randint(-position_variation_range, position_variation_range)

        d.text((position_x, position_y), char, fill="black", font=font, anchor="mm")

        # Save the image
        filename = f"{char}_{sample}.png"
        img.save(os.path.join(images_dir, filename))

        # Bounding box information (assuming fixed proportion of image size)
        norm_x1, norm_y1, norm_x2, norm_y2 = 0.1, 0.1, 0.9, 0.9
        class_id = char_to_id[char]
        bbox_info = f"{class_id} {norm_x1} {norm_y1} {norm_x2} {norm_y1} {norm_x2} {norm_y2} {norm_x1} {norm_y2}"

        txt_filename = f"{char}_{sample}.txt"
        with open(os.path.join(annotations_dir, txt_filename), 'w') as file:
            file.write(bbox_info)


all_filenames = [f"{char}_{sample}" for char in string.ascii_uppercase + ''.join(map(str, range(10))) for sample in range(samples_per_char)]

train_files, test_files = train_test_split(all_filenames, test_size=0.2, random_state=42)
valid_files, test_files = train_test_split(test_files, test_size=0.5, random_state=42)  # Adjusts to 10% for test

for split in ['train', 'valid', 'test']:
    os.makedirs(os.path.join(base_dir, split, 'images'), exist_ok=True)
    os.makedirs(os.path.join(base_dir, split, 'labels'), exist_ok=True)

def move_files(file_list, source_dir, dest_dir, file_ext):
    for filename in file_list:
        shutil.move(os.path.join(source_dir, filename + file_ext), os.path.join(dest_dir, filename + file_ext))

# Move the files
move_files(train_files, images_dir, os.path.join(base_dir, 'train', 'images'), '.png')
move_files(train_files, annotations_dir, os.path.join(base_dir, 'train', 'labels'), '.txt')

move_files(valid_files, images_dir, os.path.join(base_dir, 'valid', 'images'), '.png')
move_files(valid_files, annotations_dir, os.path.join(base_dir, 'valid', 'labels'), '.txt')

move_files(test_files, images_dir, os.path.join(base_dir, 'test', 'images'), '.png')
move_files(test_files, annotations_dir, os.path.join(base_dir, 'test', 'labels'), '.txt')

print("Dataset generation and organization completed.")


Dataset generation and organization completed.
