In [268]:
import xml.etree.ElementTree as ET
import os
import pandas as pd
import json
import pycocotools.coco as pycoco
import mmdet
from mmdet.datasets.registry import DATASETS
import numpy as np
from pycocotools import mask, _mask
import matplotlib.pyplot as plt
%matplotlib inline
from itertools import groupby
import cv2 as cv
import mmcv


In [6]:
def print_recursive(root, i=0):
    print(" "*i, root.tag, root.attrib)
    
    for child in root:
        print_recursive(child, i+1)

In [120]:
def binary_mask_to_rle(binary_mask):
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    counts = rle.get('counts')
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
        if i == 0 and value == 1:
            counts.append(0)
        counts.append(len(list(elements)))
    return rle

def bin_mask_to_poly(binary_mask):
    contours, hierarchy = cv.findContours(binary_mask.astype(np.uint8), cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    
    segmentation = []

    for contour in contours:
        contour = contour.flatten().tolist()
        # segmentation.append(contour)
        if len(contour) > 4:
            segmentation.append(contour)
            
    return segmentation

In [56]:
def bbox_to_mask(bbox, h, w):
    x1, y1, bw, bh = map(int, bbox)
    bin_mask = np.zeros((h, w), dtype=np.uint8)
    bin_mask[y1:y1 + bh, x1:x1 + bw] = 1
    return bin_mask

In [57]:
def xml_to_df(xml_path):
    """
    Parse UE-DETRAC annotations, turn them into a dataframe
    
    """
    
    tree = ET.parse(xml_path)
    
    boxes = []
    for child in tree.getroot():
        if child.tag != "frame":
            continue

        frame = int(child.attrib["num"])

        for target in child.find("target_list").findall("target"):
            box = target.find("box")

            x1 = float(box.attrib["left"])
            y1 = float(box.attrib["top"])
            x2 = x1 + float(box.attrib["width"])
            y2 = y1 + float(box.attrib["height"])

            boxes.append([frame, x1, y1, x2, y2])

    df = pd.DataFrame(boxes, columns=["frame", "x1", "y1", "x2", "y2"])
    
    return df

In [282]:
def convert_xml(xml_path, images_folder, coco_json_path, img_shape=(540, 960)):
    """
    Turns UA-DETRAC Dataset into COCO-format Dataset.
    
    """
    
    
    
    df = xml_to_df(xml_path)
    
    df["w"] = df["x2"] - df["x1"]
    df["h"] = df["y2"] - df["y1"]
    
    
    coco = {
        "info": {},
        "licenses": [],
        "images": [],
        "annotations": [],
        "categories": categories
    }
    
    images_id_root = os.path.split(images_folder)[1].split("_")[1] # "/train/images/MVI_20011" -> "20011"

    frame_to_img = {}
    for filename in sorted(os.listdir(images_folder)):
        if not filename[-4:] == ".jpg":
            continue

        img_id_str = filename[3:-4] # "img00001.jpg" -> "00001"
        image_id = int(images_id_root + img_id_str)
        frame_num = int(img_id_str)

        frame_to_img[frame_num] = image_id
        # Add image
        coco["images"].append({
            "file_name": filename,
            "id": image_id,
            "frame_num": frame_num,
            "width": img_shape[1],
            "height": img_shape[0],
        })
        
    

        # Add annotations
        for i, line in df[df.frame == frame_num].reset_index().iterrows():
            bbox = list(map(int, [line["x1"], line["y1"], line["w"], line["h"]]))
            bin_mask = bbox_to_mask(bbox, img_shape[0], img_shape[1])
            
            coco["annotations"].append({
                "bbox": bbox,
                "image_id": image_id,
                "category_id": 3, # todo: properly decide category
                "iscrowd": 0,
                "area": int(line["w"] * line["h"]),
                "id": int(images_id_root + img_id_str + f"{i:0>3}"),
#                 "segmentation": binary_mask_to_rle(bin_mask)
                "segmentation": bin_mask_to_poly(bin_mask)

            })
            
#             print(i, end="")
            
#         print()
        
    
    # todo
    def create_annotations(frame, group):
        for i, line in group.iterrows():
            create_annotation(frame, i, *line)
            
        
    def create_annotation(frame, i, x1, y1, x2, y2, w, h):
        # todo: parallelise this to work with apply on dataframe
        
        
        {
            "bbox": [x1, y1, w, h],
            "image_id": frame_to_img[frame],
            "category_id": 3, # todo: properly decide category
            "iscrowd": 0,
            "area": w * h,
            "id": int(images_id_root + img_id_str + f"{i:0>3}"),
            "segmentation": binary_mask_to_rle(bin_mask)
        }
            
    
    with open(coco_json_path, "w") as f:
        json.dump(coco, f)

In [284]:
def convert_all(xml_folder, images_folder_folder, coco_json_folder):
    for img_folder_name in sorted(os.listdir(images_folder_folder)):
        # Set up paths
        xml_path = os.path.join(xml_folder, f"{img_folder_name}_v3.xml")
        images_folder = os.path.join(images_folder_folder, img_folder_name)
        coco_json_path = os.path.join(coco_json_folder, f"{img_folder_name}.json")
        
        convert_xml(xml_path, images_folder, coco_json_path)
        
        print("Done:", img_folder_name)

In [144]:
def combine_all(images_folder_folder, coco_json_folder):
    big_coco = {}
    
    for img_folder_name in sorted(os.listdir(images_folder_folder)):
        # Set up paths
        images_folder = os.path.join(images_folder_folder, img_folder_name)
        coco_json_path = os.path.join(coco_json_folder, f"{img_folder_name}.json")
        
        with open(coco_json_path) as f:
            coco = json.load(f)
            
        if "categories" not in big_coco: # initial info
            big_coco["categories"] = coco["categories"]
            big_coco["info"] = coco["info"]
            big_coco["licenses"] = coco["licenses"]
            big_coco["images"] = []
            big_coco["annotations"] = []
            
            
        for image in coco["images"]:
            image["file_name"] = os.path.join(img_folder_name, image["file_name"])
            big_coco["images"].append(image)
            
        big_coco["annotations"] += coco["annotations"]
        
        print("Added:", img_folder_name)
        
    return big_coco

In [165]:
def tvt_split(js, train=0.7, val=0.3):
    """
    Splits annotations into (train, val, test) sets
    """
    
    
    indices = np.arange(len(js["images"]))
    np.random.shuffle(indices)
    
    train_idx = indices[:int(train*len(indices))] # 0:train
    val_idx = indices[int(train*len(indices)):int((train+val)*len(indices))] # train:val
    test_idx = indices[int((train+val)*len(indices)):] # val:
    
    splits = []
    for idx in (train_idx, val_idx, test_idx):
        images = [js["images"][i] for i in idx]
        img_ids = set(img["id"] for img in images)
    
        annotations = [ann for ann in js["annotations"] if ann["image_id"] in img_ids]
        
        splits.append({
            "info": js["info"],
            "licenses": js["licenses"],
            "categories": js["categories"],
            "images": images,
            "annotations": annotations
        })
        
    return splits

---

In [12]:
# COCO Categories
categories = [{"supercategory": "person","id": 1,"name": "person"},{"supercategory": "vehicle","id": 2,"name": "bicycle"},{"supercategory": "vehicle","id": 3,"name": "car"},{"supercategory": "vehicle","id": 4,"name": "motorcycle"},{"supercategory": "vehicle","id": 5,"name": "airplane"},{"supercategory": "vehicle","id": 6,"name": "bus"},{"supercategory": "vehicle","id": 7,"name": "train"},{"supercategory": "vehicle","id": 8,"name": "truck"},{"supercategory": "vehicle","id": 9,"name": "boat"},{"supercategory": "outdoor","id": 10,"name": "traffic light"},{"supercategory": "outdoor","id": 11,"name": "fire hydrant"},{"supercategory": "outdoor","id": 13,"name": "stop sign"},{"supercategory": "outdoor","id": 14,"name": "parking meter"},{"supercategory": "outdoor","id": 15,"name": "bench"},{"supercategory": "animal","id": 16,"name": "bird"},{"supercategory": "animal","id": 17,"name": "cat"},{"supercategory": "animal","id": 18,"name": "dog"},{"supercategory": "animal","id": 19,"name": "horse"},{"supercategory": "animal","id": 20,"name": "sheep"},{"supercategory": "animal","id": 21,"name": "cow"},{"supercategory": "animal","id": 22,"name": "elephant"},{"supercategory": "animal","id": 23,"name": "bear"},{"supercategory": "animal","id": 24,"name": "zebra"},{"supercategory": "animal","id": 25,"name": "giraffe"},{"supercategory": "accessory","id": 27,"name": "backpack"},{"supercategory": "accessory","id": 28,"name": "umbrella"},{"supercategory": "accessory","id": 31,"name": "handbag"},{"supercategory": "accessory","id": 32,"name": "tie"},{"supercategory": "accessory","id": 33,"name": "suitcase"},{"supercategory": "sports","id": 34,"name": "frisbee"},{"supercategory": "sports","id": 35,"name": "skis"},{"supercategory": "sports","id": 36,"name": "snowboard"},{"supercategory": "sports","id": 37,"name": "sports ball"},{"supercategory": "sports","id": 38,"name": "kite"},{"supercategory": "sports","id": 39,"name": "baseball bat"},{"supercategory": "sports","id": 40,"name": "baseball glove"},{"supercategory": "sports","id": 41,"name": "skateboard"},{"supercategory": "sports","id": 42,"name": "surfboard"},{"supercategory": "sports","id": 43,"name": "tennis racket"},{"supercategory": "kitchen","id": 44,"name": "bottle"},{"supercategory": "kitchen","id": 46,"name": "wine glass"},{"supercategory": "kitchen","id": 47,"name": "cup"},{"supercategory": "kitchen","id": 48,"name": "fork"},{"supercategory": "kitchen","id": 49,"name": "knife"},{"supercategory": "kitchen","id": 50,"name": "spoon"},{"supercategory": "kitchen","id": 51,"name": "bowl"},{"supercategory": "food","id": 52,"name": "banana"},{"supercategory": "food","id": 53,"name": "apple"},{"supercategory": "food","id": 54,"name": "sandwich"},{"supercategory": "food","id": 55,"name": "orange"},{"supercategory": "food","id": 56,"name": "broccoli"},{"supercategory": "food","id": 57,"name": "carrot"},{"supercategory": "food","id": 58,"name": "hot dog"},{"supercategory": "food","id": 59,"name": "pizza"},{"supercategory": "food","id": 60,"name": "donut"},{"supercategory": "food","id": 61,"name": "cake"},{"supercategory": "furniture","id": 62,"name": "chair"},{"supercategory": "furniture","id": 63,"name": "couch"},{"supercategory": "furniture","id": 64,"name": "potted plant"},{"supercategory": "furniture","id": 65,"name": "bed"},{"supercategory": "furniture","id": 67,"name": "dining table"},{"supercategory": "furniture","id": 70,"name": "toilet"},{"supercategory": "electronic","id": 72,"name": "tv"},{"supercategory": "electronic","id": 73,"name": "laptop"},{"supercategory": "electronic","id": 74,"name": "mouse"},{"supercategory": "electronic","id": 75,"name": "remote"},{"supercategory": "electronic","id": 76,"name": "keyboard"},{"supercategory": "electronic","id": 77,"name": "cell phone"},{"supercategory": "appliance","id": 78,"name": "microwave"},{"supercategory": "appliance","id": 79,"name": "oven"},{"supercategory": "appliance","id": 80,"name": "toaster"},{"supercategory": "appliance","id": 81,"name": "sink"},{"supercategory": "appliance","id": 82,"name": "refrigerator"},{"supercategory": "indoor","id": 84,"name": "book"},{"supercategory": "indoor","id": 85,"name": "clock"},{"supercategory": "indoor","id": 86,"name": "vase"},{"supercategory": "indoor","id": 87,"name": "scissors"},{"supercategory": "indoor","id": 88,"name": "teddy bear"},{"supercategory": "indoor","id": 89,"name": "hair drier"},{"supercategory": "indoor","id": 90,"name": "toothbrush"}]

In [13]:
xml_folder = "/data/data/UA_DETRAC/DETRAC-Train-Annotations-XML-v3"
images_folder_folder = "/data/data/UA_DETRAC/train/images"
coco_json_folder = "/data/data/UA_DETRAC/train/annotations/"

In [285]:
convert_all(xml_folder, images_folder_folder, coco_json_folder)

Done: MVI_20011
Done: MVI_20012
Done: MVI_20032
Done: MVI_20033
Done: MVI_20034
Done: MVI_20035
Done: MVI_20051
Done: MVI_20052
Done: MVI_20061
Done: MVI_20062
Done: MVI_20063
Done: MVI_20064
Done: MVI_20065
Done: MVI_39761
Done: MVI_39771
Done: MVI_39781
Done: MVI_39801
Done: MVI_39811
Done: MVI_39821
Done: MVI_39851
Done: MVI_39861
Done: MVI_39931
Done: MVI_40131
Done: MVI_40141
Done: MVI_40152
Done: MVI_40161
Done: MVI_40162
Done: MVI_40171
Done: MVI_40172
Done: MVI_40181
Done: MVI_40191
Done: MVI_40192
Done: MVI_40201
Done: MVI_40204
Done: MVI_40211
Done: MVI_40212
Done: MVI_40213
Done: MVI_40241
Done: MVI_40243
Done: MVI_40244
Done: MVI_40732
Done: MVI_40751
Done: MVI_40752
Done: MVI_40871
Done: MVI_40962
Done: MVI_40963
Done: MVI_40981
Done: MVI_40991
Done: MVI_40992
Done: MVI_41063
Done: MVI_41073
Done: MVI_63521
Done: MVI_63525
Done: MVI_63544
Done: MVI_63552
Done: MVI_63553
Done: MVI_63554
Done: MVI_63561
Done: MVI_63562
Done: MVI_63563


In [286]:
big_js = combine_all(images_folder_folder, coco_json_folder)

Added: MVI_20011
Added: MVI_20012
Added: MVI_20032
Added: MVI_20033
Added: MVI_20034
Added: MVI_20035
Added: MVI_20051
Added: MVI_20052
Added: MVI_20061
Added: MVI_20062
Added: MVI_20063
Added: MVI_20064
Added: MVI_20065
Added: MVI_39761
Added: MVI_39771
Added: MVI_39781
Added: MVI_39801
Added: MVI_39811
Added: MVI_39821
Added: MVI_39851
Added: MVI_39861
Added: MVI_39931
Added: MVI_40131
Added: MVI_40141
Added: MVI_40152
Added: MVI_40161
Added: MVI_40162
Added: MVI_40171
Added: MVI_40172
Added: MVI_40181
Added: MVI_40191
Added: MVI_40192
Added: MVI_40201
Added: MVI_40204
Added: MVI_40211
Added: MVI_40212
Added: MVI_40213
Added: MVI_40241
Added: MVI_40243
Added: MVI_40244
Added: MVI_40732
Added: MVI_40751
Added: MVI_40752
Added: MVI_40871
Added: MVI_40962
Added: MVI_40963
Added: MVI_40981
Added: MVI_40991
Added: MVI_40992
Added: MVI_41063
Added: MVI_41073
Added: MVI_63521
Added: MVI_63525
Added: MVI_63544
Added: MVI_63552
Added: MVI_63553
Added: MVI_63554
Added: MVI_63561
Added: MVI_635

In [154]:
np.

<module 'numpy' from '/home/adam/.local/share/virtualenvs/data-I7nS9QO2/lib/python3.6/site-packages/numpy/__init__.py'>

In [151]:
with open("/data/data/UA_DETRAC/train/annotations/all.json", "w") as f:
    json.dump(big_js, f)

In [138]:
with open("/data/data/UA_DETRAC/train/annotations/MVI_20011.json") as f:
    js = json.load(f)

In [158]:
js["images"][0]

{'file_name': 'img00001.jpg',
 'id': 2001100001,
 'frame_num': 1,
 'width': 960,
 'height': 540}

In [290]:
data = mmdet.datasets.coco.CocoDataset("/data/data/UA_DETRAC/train/annotations/all_train.json", "/data/data/UA_DETRAC/train/images/", [(1600, 400), (1600, 1400)], dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
)

loading annotations into memory...
Done (t=2.18s)
creating index...
index created!


In [292]:
for i in range(len(data)):
    img_info = data.img_infos[i]
    img = mmcv.imread(os.path.join(data.img_prefix, img_info['filename']))
    
    if img is None:
        print(i, img_info)

In [275]:
next(i for i in range(len(big_js["images"])) if big_js["images"][i]["id"] == 4021101387)

42747

In [276]:
big_js["images"][42747]

{'file_name': 'MVI_40211/img01387.jpg',
 'id': 4021101387,
 'frame_num': 1387,
 'width': 960,
 'height': 540}

In [291]:
len(data)

57451

In [288]:
big_js["annotations"][33870:33874]

[{'bbox': [326, 184, 80, 83],
  'image_id': 2003500065,
  'category_id': 3,
  'iscrowd': 0,
  'area': 6681,
  'id': 2003500065004,
  'segmentation': [[326, 184, 326, 266, 405, 266, 405, 184]]},
 {'bbox': [252, 164, 61, 64],
  'image_id': 2003500065,
  'category_id': 3,
  'iscrowd': 0,
  'area': 3967,
  'id': 2003500065005,
  'segmentation': [[252, 164, 252, 227, 312, 227, 312, 164]]},
 {'bbox': [340, 153, 60, 45],
  'image_id': 2003500065,
  'category_id': 3,
  'iscrowd': 0,
  'area': 2786,
  'id': 2003500065006,
  'segmentation': [[340, 153, 340, 197, 399, 197, 399, 153]]},
 {'bbox': [408, 93, 40, 40],
  'image_id': 2003500065,
  'category_id': 3,
  'iscrowd': 0,
  'area': 1649,
  'id': 2003500065007,
  'segmentation': [[408, 93, 408, 132, 447, 132, 447, 93]]}]

In [287]:
train_js, val_js, test_js = tvt_split(big_js)

with open("/data/data/UA_DETRAC/train/annotations/all_train.json", "w") as f:
    json.dump(train_js, f)

with open("/data/data/UA_DETRAC/train/annotations/all_val.json", "w") as f:
    json.dump(val_js, f)

# with open("/data/data/UA_DETRAC/train/annotations/all_test.json", "w") as f:
#     json.dump(test_js, f)

In [168]:
te

{'info': {},
 'licenses': [],
 'categories': [{'supercategory': 'person', 'id': 1, 'name': 'person'},
  {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'},
  {'supercategory': 'vehicle', 'id': 3, 'name': 'car'},
  {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'},
  {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
  {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'},
  {'supercategory': 'vehicle', 'id': 7, 'name': 'train'},
  {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'},
  {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
  {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'},
  {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'},
  {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'},
  {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'},
  {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'},
  {'supercategory': 'animal', 'id': 16, 'name': 'bird'},
  {'supercategory': 'animal', 'id': 17, 'name': 'c

In [320]:
big_js["categories"]

[{'supercategory': 'person', 'id': 1, 'name': 'person'},
 {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'},
 {'supercategory': 'vehicle', 'id': 3, 'name': 'car'},
 {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'},
 {'supercategory': 'vehicle', 'id': 7, 'name': 'train'},
 {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'},
 {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'},
 {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'},
 {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'},
 {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'},
 {'supercategory': 'animal', 'id': 16, 'name': 'bird'},
 {'supercategory': 'animal', 'id': 17, 'name': 'cat'},
 {'supercategory': 'animal', 'id': 18, 'name': 'dog'},