# Import libraries

In [1]:
!pip install xmltodict

Collecting xmltodict
  Using cached xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Using cached xmltodict-0.14.2-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.14.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import xmltodict
import json

# Read and parse xml to python dictionary

In [3]:
# B data extraction
with open("viper-annotations/video1_jorge_carros_buses.xgtf", "rb") as f:
  B1_dict = xmltodict.parse(f)

with open("viper-annotations/video1_jorge_motos.xgtf", "rb") as f:
  B2_dict = xmltodict.parse(f)

B_annotations = B1_dict["viper"]["data"]["sourcefile"]["object"] + B2_dict["viper"]["data"]["sourcefile"]["object"]

# C data extraction
with open("viper-annotations/lcarrosfinal.xgtf", "rb") as f:
  C1_dict = xmltodict.parse(f)

with open("viper-annotations/lmotosybuses.xgtf", "rb") as f:
  C2_dict = xmltodict.parse(f)

C_annotations = C1_dict["viper"]["data"]["sourcefile"]["object"] + C2_dict["viper"]["data"]["sourcefile"]["object"]

In [4]:
# transforms the index from 1 to 6600, so it matches the image filenames
def n_to_int(frame, intervals):
  for i, j in intervals:
    if j - i + 1 >= frame:
      return i + frame - 1
    else:
      frame -= (j - i + 1)
  
B_intervals = [(21854, 28353)]
C_intervals = [(6705, 8904), (10056, 12255), (20112, 22311)]

## Dataset B

In [5]:
# creates a list with the annotations of every image
annotations_B = []

for i in range(6500):
  frame_string = str(n_to_int(i+1, intervals=B_intervals))
  str_len = len(frame_string)
  if str_len < 6:
    frame_string = (6 - str_len)*"0" + frame_string

  annotations_B.append({
    "annotation": {
      "folder": "v1",
      "filename": f"v1_{frame_string}.jpg",
      "source": {
        "database": "Unknown",
        "annotation": "Unknown",
        "image": "Unknown"
      },
      "size": {
        "width": 3840,
        "height": 2160,
        "depth": ""
      },
      "segmented": 0,
      "object": []
    }
  })

In [6]:
# this is to get the max id of each class, since cvat doesnt allow repeated track ids, even if the classes are different
B_car_id = []
B_bus_id = []
B_motorbike_id = []

for obj in B_annotations:
  obj_class = obj["@name"] if obj["@name"] != "moto" else "motorbike"
  obj_id = int(obj["@id"])

  if obj_class == "car":
    B_car_id.append(obj_id)
  if obj_class == "bus":
    B_bus_id.append(obj_id)
  if obj_class == "motorbike":
    B_motorbike_id.append(obj_id)

In [7]:
B_max_car_id = int(B_car_id[-1]) + 1
B_max_bus_id = int(B_bus_id[-1]) + 1

In [8]:
# cars, buses and motorbike

for obj in B_annotations:
  obj_class = obj["@name"] if obj["@name"] != "moto" else "motorbike"

  obj_id = int(obj["@id"]) + 1
  if obj_class == "bus":
    obj_id += B_max_car_id
  
  if obj_class == "motorbike":
    obj_id += B_max_car_id + B_max_bus_id

  for att in obj["attribute"]:
    if att["@name"] == "bbox":
      for ann in att["data:bbox"]:
        i_frame, j_frame = ann["@framespan"].split(":")
        i_frame = int(i_frame)
        j_frame = int(j_frame)

        height = int(ann["@height"])
        width = int(ann["@width"])
        x = int(ann["@x"])
        y = int(ann["@y"])

        xmin = x * 3840 / 1920
        ymin = y * 2160 / 1080
        xmax = (x + width) * 3840 / 1920
        ymax = (y + height) * 2160 / 1080
        
        for curr_frame in range(i_frame, j_frame+1):
          keyframe = "True" if curr_frame == i_frame else "False"

          annotations_B[curr_frame - 1]["annotation"]["object"].append({
            "name": obj_class,
            "truncated": 0,
            "occluded": 0,
            "difficult": 0,
            "bndbox": {
              "xmin": xmin,
              "ymin": ymin,
              "xmax": xmax,
              "ymax": ymax
            },
            "attributes": {
              "attribute": [
                {
                  "name": "rotation",
                  "value": 0.0,
                },
                {
                  "name": "track_id",
                  "value": obj_id,
                },
                {
                  "name": "keyframe",
                  "value": keyframe
                }
              ]
            }
          })

### Write each Pascal VOC 1.1 .xml file

In [9]:
imagesets = ""
for i in range(len(annotations_B)):
  filename, _ = annotations_B[i]["annotation"]["filename"].split(".")
  imagesets += filename + "\n"
  filename = filename + ".xml"
  with open(f"pascal-annotations/B_dataset/Annotations/{filename}", "w") as f:
    xmlstring = xmltodict.unparse(annotations_B[i], pretty=True)
    f.write(xmlstring)

### Write each filename in a .txt file

In [10]:
with open("pascal-annotations/B_dataset/ImageSets/Main/default.txt", "w") as f:
  f.write(imagesets)

## Dataset C

In [11]:
# creates a list with the annotations of every image
annotations_C = []

for i in range(6600):
  frame_string = str(n_to_int(i+1, intervals=C_intervals))
  str_len = len(frame_string)
  if str_len < 6:
    frame_string = (6 - str_len)*"0" + frame_string

  annotations_C.append({
    "annotation": {
      "folder": "v2",
      "filename": f"v2_{frame_string}.jpg",
      "source": {
        "database": "Unknown",
        "annotation": "Unknown",
        "image": "Unknown"
      },
      "size": {
        "width": 3840,
        "height": 2160,
        "depth": ""
      },
      "segmented": 0,
      "object": []
    }
  })

In [12]:
# this is to get the max id of each class, since cvat doesnt allow repeated track ids, even if the classes are different
C_car_id = []
C_bus_id = []
C_motorbike_id = []

for obj in C_annotations:
  obj_class = obj["@name"] if obj["@name"] != "moto" else "motorbike"
  obj_id = int(obj["@id"])

  if obj_class == "car":
    C_car_id.append(obj_id)
  if obj_class == "bus":
    C_bus_id.append(obj_id)
  if obj_class == "motorbike":
    C_motorbike_id.append(obj_id)

In [13]:
C_max_car_id = int(C_car_id[-1]) + 1
C_max_bus_id = int(C_bus_id[-1]) + 1

In [14]:
# cars, buses and motorbike
maxframe = 0
for obj in C_annotations:
  obj_class = obj["@name"] if obj["@name"] != "moto" else "motorbike"

  obj_id = int(obj["@id"]) + 1
  if obj_class == "bus":
    obj_id += C_max_car_id
  
  if obj_class == "motorbike":
    obj_id += C_max_car_id + C_max_bus_id

  for att in obj["attribute"]:
    if att["@name"] == "bbox":
      for ann in att["data:bbox"]:
        if type(ann) != dict:
          continue
        i_frame, j_frame = ann["@framespan"].split(":")
        i_frame = int(i_frame)
        j_frame = int(j_frame)

        if j_frame > maxframe and j_frame <= 6600:
          maxframe = j_frame
        if j_frame > 3351:
          print(f"{obj_class} {obj_id-1}")

        height = int(ann["@height"])
        width = int(ann["@width"])
        x = int(ann["@x"])
        y = int(ann["@y"])

        xmin = x * 3840 / 1920
        ymin = y * 2160 / 1080
        xmax = (x + width) * 3840 / 1920
        ymax = (y + height) * 2160 / 1080
        
        for curr_frame in range(i_frame, j_frame+1):
          keyframe = "True" if curr_frame == i_frame else "False"

          if curr_frame >= len(annotations_C):
            continue

          annotations_C[curr_frame - 1]["annotation"]["object"].append({
            "name": obj_class,
            "truncated": 0,
            "occluded": 0,
            "difficult": 0,
            "bndbox": {
              "xmin": xmin,
              "ymin": ymin,
              "xmax": xmax,
              "ymax": ymax
            },
            "attributes": {
              "attribute": [
                {
                  "name": "rotation",
                  "value": 0.0,
                },
                {
                  "name": "track_id",
                  "value": obj_id,
                },
                {
                  "name": "keyframe",
                  "value": keyframe
                }
              ]
            }
          })

car 19
motorbike 397


In [15]:
maxframe

3351

In [16]:
imagesets = ""
for i in range(len(annotations_C)):
  filename, _ = annotations_C[i]["annotation"]["filename"].split(".")
  imagesets += filename + "\n"
  filename = filename + ".xml"
  with open(f"pascal-annotations/C_dataset/Annotations/{filename}", "w") as f:
    xmlstring = xmltodict.unparse(annotations_C[i], pretty=True)
    f.write(xmlstring)

In [17]:
with open("pascal-annotations/C_dataset/ImageSets/Main/default.txt", "w") as f:
  f.write(imagesets)