# Convert Bdd100k labels to coco format


### Bdd100k labels format

- name: string
- url: string
- videoName: string (optional)
- attributes:
    - weather: "rainy|snowy|clear|overcast|undefined|partly cloudy|foggy"
    - scene: "tunnel|residential|parking lot|undefined|city street|gas stations|highway|"
    - timeofday: "daytime|night|dawn/dusk|undefined"
- intrinsics
    - focal: [x, y]
    - center: [x, y]
    - nearClip:
- extrinsics
    - location
    - rotation
- timestamp: int64 (epoch time ms)
- frameIndex: int (optional, frame index in this video)
- labels [ ]:
    - id: int32
    - category: string (classification)
    - manualShape: boolean (whether the shape of the label is created or modified manually)
    - manualAttributes: boolean (whether the attribute of the label is created or modified manually)
    - score: float (the confidence or some other ways of measuring the quality of the label.)
    - attributes:
        - occluded: boolean
        - truncated: boolean
        - trafficLightColor: "red|green|yellow|none"
        - areaType: "direct | alternative" (for driving area)
        - laneDirection: "parallel|vertical" (for lanes)
        - laneStyle: "solid | dashed" (for lanes)
        - laneTypes: (for lanes)
    - box2d:
       - x1: float
       - y1: float
       - x2: float
       - y2: float
   - box3d:
       - alpha: (observation angle if there is a 2D view)
       - orientation: (3D orientation of the bounding box, used for 3D point cloud annotation)
       - location: (3D point, x, y, z, center of the box)
       - dimension: (3D point, height, width, length)
   - poly2d: an array of objects, with the structure
       - vertices: [][]float (list of 2-tuples [x, y])
       - types: string (each character corresponds to the type of the vertex with the same index in vertices. ‘L’ for vertex and ‘C’ for control point of a bezier curve.
       - closed: boolean (closed for polygon and otherwise for path)



In [2]:
import os
import json
import argparse
from tqdm import tqdm



def bdd2coco_detection(id_dict, labeled_images, fn):

    images = list()
    annotations = list()

    counter = 0
    for i in tqdm(labeled_images):
        counter += 1
        image = dict()
        image['file_name'] = i['name']
        image['height'] = 720
        image['width'] = 1280

        image['id'] = counter

        empty_image = True

        for label in i['labels']:
            annotation = dict()
            #处理不同的信号灯
            category=label['category']
            if (category == "traffic light"):
                color = label['attributes']['trafficLightColor']
                category = "tl_" + color
            if category in id_dict.keys():
                empty_image = False
                annotation["iscrowd"] = 0
                annotation["image_id"] = image['id']
                x1 = label['box2d']['x1']
                y1 = label['box2d']['y1']
                x2 = label['box2d']['x2']
                y2 = label['box2d']['y2']
                annotation['bbox'] = [x1, y1, x2-x1, y2-y1]
                annotation['area'] = float((x2 - x1) * (y2 - y1))
                annotation['category_id'] = id_dict[category]
                annotation['ignore'] = 0
                annotation['id'] = label['id']
                annotation['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
                annotations.append(annotation)

        if empty_image:
            continue

        images.append(image)

    attr_dict["images"] = images
    attr_dict["annotations"] = annotations
    attr_dict["type"] = "instances"

    print('saving...')
    json_string = json.dumps(attr_dict)
    with open(fn, "w") as file:
        file.write(json_string)


In [6]:
if __name__ == '__main__':

    label_dir="bdd100k/label"
    save_path="bdd100k/label"

    attr_dict = dict()
    attr_dict["categories"] = [
        {"supercategory": "none", "id": 1, "name": "person"},
        {"supercategory": "none", "id": 2, "name": "rider"},
        {"supercategory": "none", "id": 3, "name": "car"},
        {"supercategory": "none", "id": 4, "name": "bus"},
        {"supercategory": "none", "id": 5, "name": "truck"},
        {"supercategory": "none", "id": 6, "name": "bike"},
        {"supercategory": "none", "id": 7, "name": "motor"},
        {"supercategory": "none", "id": 8, "name": "tl_green"},
        {"supercategory": "none", "id": 9, "name": "tl_red"},
        {"supercategory": "none", "id": 10, "name": "tl_yellow"},
        {"supercategory": "none", "id": 11, "name": "tl_none"},
        {"supercategory": "none", "id": 12, "name": "traffic sign"},
        {"supercategory": "none", "id": 13, "name": "train"}
    ]

    attr_id_dict = {i['name']: i['id'] for i in attr_dict['categories']}

    # create BDD training set detections in COCO format
    print('Loading training set...')
    with open(os.path.join(label_dir,
                           'bdd100k_labels_images_train.json')) as f:
        train_labels = json.load(f)
    print('Converting training set...')

    out_fn = os.path.join(save_path,
                          'bdd100k_labels_images_det_coco_train.json')
    bdd2coco_detection(attr_id_dict, train_labels, out_fn)

    print('Loading validation set...')
    # create BDD validation set detections in COCO format
    with open(os.path.join(label_dir,
                           'bdd100k_labels_images_val.json')) as f:
        val_labels = json.load(f)
    print('Converting validation set...')

    out_fn = os.path.join(save_path,
                          'bdd100k_labels_images_det_coco_val.json')
    bdd2coco_detection(attr_id_dict, val_labels, out_fn)

Loading training set...
Converting training set...


100%|██████████| 69863/69863 [00:09<00:00, 7384.22it/s] 


saving...
Loading validation set...
Converting validation set...


100%|██████████| 10000/10000 [00:00<00:00, 21251.52it/s]


saving...


# Convert COCO to yolo format

In [3]:
#-*-coding:utf-8-*-
# python3 example.py --datasets COCO --img_path /home/iav/code/bdd100k_yolov5/bdd100k_images/bdd100k/images/100k/train --label labels/bdd100k_labels_images_det_coco_val.json --convert_output_path train_labels/ --img_type ".jpg" --manipast_path ./ --cls_list_file bdd100k.names

import os
from xml.etree.ElementTree import dump
import json
import pprint

import argparse

from Format import COCO,YOLO

def main(config):

    if config["datasets"] == "COCO":
        coco = COCO()
        yolo = YOLO(os.path.abspath(config["cls_list"]))

        flag, data = coco.parse(config["label"])

        if flag == True:
            flag, data = yolo.generate(data)

            if flag == True:
                flag, data = yolo.save(data, config["output_path"], config["img_path"],
                                        config["img_type"], config["manipast_path"])

                if flag == False:
                    print("Saving Result : {}, msg : {}".format(flag, data))

            else:
                print("YOLO Generating Result : {}, msg : {}".format(flag, data))

        else:
            print("COCO Parsing Result : {}, msg : {}".format(flag, data))

    else:
        print("Unkwon Datasets")

if __name__ == '__main__':

#     config ={
#         "datasets": "COCO",
#         "img_path": "bdd100k/train",
#         "label": "bdd100k/label/bdd100k_labels_images_det_coco_train.json",
#         "img_type": ".jpg",
#         "manipast_path": "./",
#         "output_path": "bdd100k/labels/trains/",
#         "cls_list": "data/bdd100k.names",
#     }
    config ={
        "datasets": "COCO",
        "img_path": "bdd100k/images/train",
        "label": "bdd100k/label/bdd100k_labels_images_det_coco_train.json",
        "img_type": ".jpg",
        "manipast_path": "./",
        "output_path": "bdd100k/label/trains/",
        "cls_list": "data/bdd100k.names",
    }

    main(config)


COCO Parsing:  |████████████████████████████████████████| 100.0% (1286871/1286871)  Complete


YOLO Generating:|████████████████████████████████████████| 100.0% (69863/69863)  Complete


YOLO Saving:   |████████████████████████████████████████| 100.0% (69863/69863)  Complete



# Make sure that image matches the label one-to-one

In [2]:
 import glob, os, shutil

'''
Sometimes your image data set might not match with your label data set.
This code does the folowing
(1) Go through your image data set
(2) Search if the corresponding label file exist in the label data set. 
(3) If not, remove current image
'''


def copy_filter(label_dir,image_dir,target_dir_images,target_dir_labels):
    img_count = 0
    lab_count = 0
    for image in os.listdir(image_dir):
        if image.endswith('jpg'):
            image_name = os.path.splitext(image)[0]

            # Corresponding label file name
            label_name = image_name + '.txt'
            image_path = image_dir + '/' + image_name + '.jpg'
            if os.path.isfile(label_dir + '/' + label_name) == False:
                img_count += 1
                #print(" -- LEAVE IMAGE [Label file not found -- ]")
                
                #print(image_path)
#                 os.remove(image_path)
            else:
                target_images=target_dir_images+ '/' + image_name + '.jpg'
                shutil.copy(image_path,target_dir_images )
                #print(" --COPY IMAGE "+target_images)


    for label in os.listdir(label_dir):
        if label.endswith('.txt'):
            label_name = os.path.splitext(label)[0]

            # Corresponding label file name
            image_name = label_name + '.jpg'
            label_path = label_dir + '/' + label_name + '.txt'
            if os.path.isfile(image_dir + '/' + image_name) == False:
                lab_count += 1
                #print(" -- LEAVE LABEL [Image file not found -- ]")
                #print(label_path)
#                 os.remove(label_path)
            else:
                target_labels=target_dir_labels+ '/' + label_name + '.txt'
                shutil.copy(label_path,target_labels )
                #print(" --COPY lABELS "+target_labels)
    print("img missing {}".format(lab_count))
    print("lab missing {}".format(img_count))

In [11]:
!mkdir bdd100k/images/trains
!mkdir bdd100k/labels/trains
!mkdir bdd100k/images/valids
!mkdir bdd100k/labels/valids

命令语法不正确。
命令语法不正确。
命令语法不正确。
命令语法不正确。


In [4]:
label_dir = 'bdd100k/label/trains'
image_dir = 'bdd100k/images/train/'
target_dir_images="bdd100k/done/images/trains"
target_dir_labels="bdd100k/done/labels/trains"
copy_filter(label_dir,image_dir,target_dir_images,target_dir_labels)

img missing 66891
lab missing 4028


In [11]:
label_dir2 = 'bdd100k/label/valids'
image_dir2 = 'bdd100k/images/val'
target_dir_images2="bdd100k/done/images/valids"
target_dir_labels2="bdd100k/done/label/valids"
copy_filter(label_dir2,image_dir2,target_dir_images2,target_dir_labels2)

img missing 10000
lab missing 1000


In [None]:
labels_list=glob.glob("bdd100k/labels/valids"+"/"+"*.txt")
images_list=glob.glob("bdd100k/images/val"+"/"+"*.jpg")
print(len(labels_list))
print(len(images_list))

labels_list2=glob.glob("bdd100k/label/trains"+"/"+"*.txt")
images_list2=glob.glob("bdd100k/images/train"+"/"+"*.jpg")
print(len(labels_list2))
print(len(images_list2))

In [1]:
# copy test_img folder
!cp -r bdd100k_images/bdd100k/images/100k/test/ bdd100k/images/