# Transform GTSDB Dataset annotations to COCO Format

To use Detectron2 to train the object detection model, the dataset should be in COCO format. Given that GTSDB dataset is not in a standard format, I will start the data preparation by data transformation to COCO format (inspired from [here](http://https://gist.github.com/zhaoweizhong/053ce08beb9047b710b3616f75130c31))

## download data from GTSDB website

In [None]:
! wget https://sid.erda.dk/public/archives/ff17dc924eba88d5d01a807357d6614c/FullIJCNN2013.zip

## convert annotations to COCO format

we extract categories information from dataset description file

In [31]:
label_dict = {
    0: "20km/h",
    1: "30km/h",
    2: "50km/h",
    3: "60km/h",
    4: "70km/h",
    5: "80km/h",
    6: "End of 80km/h",
    7: "100km/h",
    8: "120km/h",
    9: "No overtaking",
    10: "No overtaking (trucks)",
    11: "Priority intersection",
    12: "Priority road",
    13: "Give way",
    14: "Stop",
    15: "No traffic",
    16: "No trucks",
    17: "No entry",
    18: "Danger",
    19: "Bend left",
    20: "Bend right",
    21: "Bend",
    22: "Uneven road",
    23: "Slippery road",
    24: "Road narrows",
    25: "Construction",
    26: "Traffic signal",
    27: "Pedestrian crossing",
    28: "School crossing",
    29: "Cycles crossing",
    30: "Snow",
    31: "Animals",
    32: "End of restriction",
    33: "Go right",
    34: "Go left",
    35: "Go straight",
    36: "Go right or straight",
    37: "Go left or straight",
    38: "Keep right",
    39: "Keep left",
    40: "Roundabout",
    41: "End of overtaking restriction",
    42: "End of overtaking restriction (trucks)"
}

In [29]:
import json
import argparse
import copy
import tqdm

def load_txt(file_name):
    file = open(file_name, 'r')
    data = []
    for line in file.readlines():
        data.append(line.replace('\n', ''))
    return data

In [30]:
file_name = "dataset/FullIJCNN2013/gt.txt"
data = load_txt(file_name)
data[:5]

['00000.ppm;774;411;815;446;11',
 '00001.ppm;983;388;1024;432;40',
 '00001.ppm;386;494;442;552;38',
 '00001.ppm;973;335;1031;390;13',
 '00002.ppm;892;476;1006;592;39']

In [34]:
len(img_id_list)

1213

In [None]:
from PIL import Image

def convert_ppm_to_jpg(ppm_path, jpg_path):
    img = Image.open(ppm_path)
    img.save(jpg_path, 'JPEG')

In [37]:
import os
from PIL import Image

def convert_ppm_to_jpg(source_dir, dest_dir):
    for filename in os.listdir(source_dir):
        if filename.endswith('.ppm'):
            img = Image.open(os.path.join(source_dir, filename))
            jpg_filename = os.path.splitext(filename)[0] + '.jpg'
            img.save(os.path.join(dest_dir, jpg_filename), 'JPEG')
convert_ppm_to_jpg('dataset/FullIJCNN2013', 'dataset/img_jpg')

In [32]:
annotation = data[0]
img_id = int(annotation.split(';')[0][:5])
img_name = annotation.split(';')[0][:5] + '.jpg'
xmin = int(annotation.split(';')[1])
ymin = int(annotation.split(';')[2])
xmax = int(annotation.split(';')[3])
ymax = int(annotation.split(';')[4])
class_id = int(annotation.split(';')[5])
print(img_id, img_name, xmin, ymin, xmax, ymax, class_id)

0 00000.jpg 774 411 815 446 11


In [None]:
def parse(data):
    # File Format
    result_train = {
        "licenses": [
            {
                "url": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
                "id": 1,
                "name": "Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)"
            }
        ],
        "images": [],
        "annotations": [],
        "categories": []
    }

    for i in range(0, 43):
        result_train['categories'].append({
            "id": i,
            "name": label_dict[i],
        })

    result_test = copy.deepcopy(result_train)

    # Images and Annotations
    count = 900
    count_train = int(count * 0.7)
    anno_id = 0
    for annotation in tqdm.tqdm(data):
        img_id = int(annotation.split(';')[0][:5])
        img_name = annotation.split(';')[0][:5] + '.jpg'
        xmin = int(annotation.split(';')[1])
        ymin = int(annotation.split(';')[2])
        xmax = int(annotation.split(';')[3])
        ymax = int(annotation.split(';')[4])
        class_id = int(annotation.split(';')[5])
        if img_id < count_train:
            # if not bool([True for img in result_train['images'] if img['id'] == img_id]):
            result_train['images'].append({
                "license": 1,
                "file_name": img_name,
                "height": 800,
                "width": 1360,
                "id": img_id
            })
            result_train['annotations'].append({
                "segmentation": [[]],
                "area": (xmax - xmin) * (ymax - ymin),
                "iscrowd": 0,
                "image_id": img_id,
                "bbox": [
                    xmin,
                    ymin,
                    xmax - xmin,
                    ymax - ymin
                ],
                "category_id": class_id,
                "id": anno_id
            })
        else:
            print(result_train['images'][-1])
            print(img_id)
            break
            if not bool([True for img in result_test['images'] if img['id'] == img_id]):
                result_test['images'].append({
                    "license": 1,
                    "file_name": img_name,
                    "height": 800,
                    "width": 1360,
                    "id": img_id
                })
            result_test['annotations'].append({
                "segmentation": [[]],
                "area": (xmax - xmin) * (ymax - ymin),
                "iscrowd": 0,
                "image_id": img_id,
                "bbox": [
                    xmin,
                    ymin,
                    xmax - xmin,
                    ymax - ymin
                ],
                "category_id": class_id,
                "id": anno_id
            })
        anno_id = anno_id + 1
    
    print('Train Images: ' + str(len(result_train['images'])))
    print('Test Images: ' + str(len(result_test['images'])))
    print('Train Annotations: ' + str(len(result_train['annotations'])))
    print('Test Annotations: ' + str(len(result_test['annotations'])))

    with open('train.json', "w") as f:
        json.dump(result_train, f)

    with open('test.json', "w") as f:
        json.dump(result_test, f)

In [6]:
data[:5]

['00000.ppm;774;411;815;446;11',
 '00001.ppm;983;388;1024;432;40',
 '00001.ppm;386;494;442;552;38',
 '00001.ppm;973;335;1031;390;13',
 '00002.ppm;892;476;1006;592;39']

In [8]:
len(data)

1213

Each row contains the file name, bounding box coordinates and label index.

In [None]:
parse(data)

In [16]:
# have a look at json file
import json
with open("train.json", "rb") as f:
    train_data = json.load(f)

In [17]:
train_data.keys()

dict_keys(['licenses', 'images', 'annotations', 'categories'])

In [19]:
len(train_data['images'])

530

In [20]:
train_data['images'][-1:]

[{'license': 1,
  'file_name': '00628.jpg',
  'height': 800,
  'width': 1360,
  'id': 628}]

Since we focus on speed limit detection, we select only the traffic signs related to speed limit as training data. In GTSDB,  they are correspondings to following label indexes: [0, 1, 2, 3, 4, 5, 7, 8]. By refering to dataset description in ReadMe.txt file, we build the following label mapping dict for the categories that we are interested in:

speed_limit_dict = {
    0: "20km/h",
    1: "30km/h",
    2: "50km/h",
    3: "60km/h",
    4: "70km/h",
    5: "80km/h",
    6: "End of 80km/h",
    7: "100km/h",
    8: "120km/h"
}