## Convert TTK100 dataset into Yolov8 Format
The __[ Tsinghua-Tencent 100K](https://cg.cs.tsinghua.edu.cn/traffic-sign/)__ dataset is a publicly available dataset commonly used for benchmarking algorithms in traffic sign recognition. It was created for the purpose of evaluating the performance of various machine learning models, particularly in the field of computer vision.

In [1]:
import os
import json

def convert_to_yolo_format(bbox, img_width, img_height):
    # Calculate center coordinates, width and height
    x_center = (bbox['xmin'] + bbox['xmax']) / 2.0 / img_width
    y_center = (bbox['ymin'] + bbox['ymax']) / 2.0 / img_height
    width = (bbox['xmax'] - bbox['xmin']) / img_width
    height = (bbox['ymax'] - bbox['ymin']) / img_height
    return x_center, y_center, width, height

def process_annotations(json_path):
    with open(json_path, 'r') as file:
        data = json.load(file)

    categories = data['types']
    imgs = data['imgs']
    
    # Create output directories if they don't exist
    os.makedirs('train_labels', exist_ok=True)
    os.makedirs('test_labels', exist_ok=True)

    for img_id, img_data in imgs.items():
        img_path = img_data['path']
        img_objects = img_data['objects']
        img_width, img_height = 1920, 1080  # Assuming fixed dimensions, can be adjusted
        
        # Determine if it's a train or test image
        if 'train/' in img_path:
            label_file = os.path.join('train_labels', f'{img_id}.txt')
        elif 'test/' in img_path:
            label_file = os.path.join('test_labels', f'{img_id}.txt')
        else:
            continue

        with open(label_file, 'w') as f:
            for obj in img_objects:
                category = obj['category']
                bbox = obj['bbox']
                category_index = categories.index(category)
                
                x_center, y_center, width, height = convert_to_yolo_format(bbox, img_width, img_height)
                f.write(f"{category_index} {x_center} {y_center} {width} {height}\n")

if __name__ == "__main__":
    json_path = '/home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/annotations_all.json' 
    process_annotations(json_path)


Remove the images with missing labels

In [3]:
import os

def delete_unmatched_images(image_dir, label_dir):
    # Get list of all images and labels
    image_files = set(os.listdir(image_dir))
    label_files = set(os.listdir(label_dir))

    # Remove file extensions and only keep the base names
    image_bases = {os.path.splitext(img)[0] for img in image_files}
    label_bases = {os.path.splitext(lbl)[0] for lbl in label_files}

    # Find images without matching labels
    unmatched_images = image_bases - label_bases

    # Delete unmatched images
    for image_base in unmatched_images:
        image_path = os.path.join(image_dir, f"{image_base}.jpg")
        if os.path.exists(image_path):
            os.remove(image_path)
            print(f"Deleted: {image_path}")

if __name__ == "__main__":
    image_dir = '/home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/'         # Replace with your train images directory
    label_dir = '/home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test_labels/'  # Replace with your train labels directory

    delete_unmatched_images(image_dir, label_dir)


Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/22284.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/74315.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/63047.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/93743.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/38135.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/7807.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/12847.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/12883.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/64554.jpg
Deleted: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/30536.jpg
D

The traffic signs are divided into different classes with ClassIds, If your goal is just the detection of traffic sign*(and not classification) irrespective of class, you will have to create only one class for all the labels.
Don't follow this step if you want to keep it in Classes  

In [5]:
import os

def set_class_id_to_zero(label_dir):
    # Get list of all label files
    label_files = [f for f in os.listdir(label_dir) if f.endswith('.txt')]

    for label_file in label_files:
        label_path = os.path.join(label_dir, label_file)

        with open(label_path, 'r') as f:
            lines = f.readlines()

        with open(label_path, 'w') as f:
            for line in lines:
                parts = line.strip().split()
                if len(parts) == 5:
                    parts[0] = '0'  # Set class ID to 0
                    f.write(' '.join(parts) + '\n')

        print(f"Updated: {label_path}")

if __name__ == "__main__":
    label_dir = '/home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/'  # Replace with your labels directory

    set_class_id_to_zero(label_dir)

Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/14703.txt
Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/94187.txt
Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/66555.txt
Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/36139.txt
Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/12567.txt
Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/10489.txt
Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/48994.txt
Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/28601.txt
Updated: /home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/23632.txt
Updated: /home/ubaurr/reposi

Create Validation images and labels and transfer images from test folder to validation folder.

In [7]:
import os
import random
import shutil

def move_images_and_labels(train_img_dir, train_label_dir, test_img_dir, test_label_dir, num_images_to_move=800):
    # List all image files in the test directory
    test_images = [f for f in os.listdir(test_img_dir) if f.endswith('.jpg')]
    test_labels = [f for f in os.listdir(test_label_dir) if f.endswith('.txt')]

    # Ensure the number of images to move does not exceed the number of test images
    num_images_to_move = min(num_images_to_move, len(test_images))

    # Randomly select images to move
    images_to_move = random.sample(test_images, num_images_to_move)

    for img_file in images_to_move:
        # Determine the corresponding label file name
        label_file = os.path.splitext(img_file)[0] + '.txt'

        # Paths for the image and label files in the test directory
        img_path = os.path.join(test_img_dir, img_file)
        label_path = os.path.join(test_label_dir, label_file)

        # Paths for the image and label files in the train directory
        new_img_path = os.path.join(train_img_dir, img_file)
        new_label_path = os.path.join(train_label_dir, label_file)

        # Move the image file
        shutil.move(img_path, new_img_path)

        # Move the label file if it exists
        if os.path.exists(label_path):
            shutil.move(label_path, new_label_path)

        print(f"Moved: {img_file} and {label_file}")

if __name__ == "__main__":
    train_img_dir = '/home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train/'         # Replace with your train images directory
    train_label_dir = '/home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/train_labels/' # Replace with your train labels directory
    test_img_dir = '/home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test/'           # Replace with your test images directory
    test_label_dir = '/home/ubaurr/repositorio/traffic_signs/traffic_env/Detection/Dataset/ttk100/test_labels/'  # Replace with your test labels directory

    move_images_and_labels(train_img_dir, train_label_dir, test_img_dir, test_label_dir, num_images_to_move=1800)


Moved: 53304.jpg and 53304.txt
Moved: 25823.jpg and 25823.txt
Moved: 63839.jpg and 63839.txt
Moved: 44969.jpg and 44969.txt
Moved: 56922.jpg and 56922.txt
Moved: 87959.jpg and 87959.txt
Moved: 43134.jpg and 43134.txt
Moved: 34726.jpg and 34726.txt
Moved: 17924.jpg and 17924.txt
Moved: 13701.jpg and 13701.txt
Moved: 8169.jpg and 8169.txt
Moved: 44883.jpg and 44883.txt
Moved: 6140.jpg and 6140.txt
Moved: 68268.jpg and 68268.txt
Moved: 61035.jpg and 61035.txt
Moved: 13497.jpg and 13497.txt
Moved: 83495.jpg and 83495.txt
Moved: 79487.jpg and 79487.txt
Moved: 81618.jpg and 81618.txt
Moved: 35824.jpg and 35824.txt
Moved: 4997.jpg and 4997.txt
Moved: 31784.jpg and 31784.txt
Moved: 21392.jpg and 21392.txt
Moved: 37172.jpg and 37172.txt
Moved: 34636.jpg and 34636.txt
Moved: 92054.jpg and 92054.txt
Moved: 37910.jpg and 37910.txt
Moved: 14823.jpg and 14823.txt
Moved: 23019.jpg and 23019.txt
Moved: 29614.jpg and 29614.txt
Moved: 32990.jpg and 32990.txt
Moved: 72550.jpg and 72550.txt
Moved: 5257.jp