# Example to convert XML annotations from CVAT to a csv format


In [1]:
# Import statements
import pandas as pd
import numpy as np
import os
import xml.etree.ElementTree as ET
import copy

## 1. Prepare header of CSV file

In [5]:
# List of the keypoints
keypoints = ['LFHoof', 'LFAnkle', 'LFKnee', 'RFHoof', 'RFAnkle', 'RFKnee',
             'LHHoof', 'LHAnkle', 'LHKnee', 'RHHoof', 'RHAnkle', 'RHKnee', 
             'Nose', 'HeadTop', 'Spine1', 'Spine2', 'Spine3' ]

# Make header for the CSV file. Here, we have video, frame, and then 3 columns per keypoint: x,y and likelihood.
# Note that I never used the likelihood in my research, but also never really bothered to remove it from my csv files...
header = ['video','frame']
for k in keypoints:
    header.append(k+"_x")
    header.append(k+"_y")
    header.append(k+"_likelihood")

## 2. Parse the XML file

In [8]:
def xml_to_csv(save_path, xml_file, header):
    """
    Function that parses a CVAT XML file and saves the annotations in a csv format
    :param save_path: path of the folder where to save the csv file
    :param xml_file: the CVAT xml file containing the annotations. It should be saved as images, and not video format (I think, it was a long time ago)
    :param header: the header of the csv file
    """

    # Get the parser for the CSV file
    tree = ET.parse(xml_file)
    root = tree.getroot()
    video_name = root.find('meta').find('source').text
    print(video_name)

    images = root.findall('image')
    print(len(images))
    #Init dict
    video_labels = {}
    for h in header:
        video_labels[h] = [None] * len(images) # empty list of the number of images

    stop_video = False

    i = -1
    # Loop through images
    for j, image in enumerate(images):

        points = list(image)
        if len(points) == 0:  # Get the labels of the videos
            for h in video_labels:
                video_labels[h].pop()
            continue
        i += 1

        if len(points) != 17:  # If more than 17 or less than 17 keypoints then there is a problem with the labels of that frame. you need to check it in CVAT
            print(video_name, "frame:", image.attrib['name'], len(points))
            stop_video = True

        video_labels['video'][i] = video_name
        video_labels['frame'][i] = int(image.attrib['name'].split('_')[1])  #"frame_123456"
#         print(video_labels['frame'][i])
        for point in points:  # loop through the keypoints
            bodypart = point.attrib['label']
            xy = point.attrib['points'].split(',') # [x,y]
            attributes = point.findall('attribute') # likelihood
            for attr in attributes:  # you should probably comment that part if you don't use likelihood
                if attr.attrib['name'] == 'likelihood':
                    like = attr.text

            if video_labels[bodypart+'_x'][i] != None:
                print(bodypart, 'double keypoint', video_name, "frame:", image.attrib['name'], video_labels[bodypart+'_x'][i], xy[0])
                stop_video = True
                continue

            # check if the keypoints are not too far from the ones in the neighbouring frames (wrong labels) You can comment this out
            if i > 0 and video_labels[bodypart+'_x'][i-1] != None:
                diff_x = np.abs(float(xy[0]) - float(video_labels[bodypart+'_x'][i-1]))
                diff_y = np.abs(float(xy[1]) - float(video_labels[bodypart+'_y'][i-1]))
                if diff_x >= 100:
                    print(bodypart, 'outlier', video_name, "frame:", image.attrib['name'], 'x', diff_x)
                    stop_video = True
                    continue
                if diff_y >= 30:
                    print(bodypart, 'outlier', video_name, "frame:", image.attrib['name'], 'y', diff_y)
                    stop_video = True
                    continue

            video_labels[bodypart+'_x'][i] = xy[0]
            video_labels[bodypart+'_y'][i] = xy[1]
            video_labels[bodypart+'_likelihood'][i] = like



    if stop_video:
        print('stop')
    else:
        df = pd.DataFrame(video_labels)
#     df.head()
        csv_file = video_name.split('.')[0]+'.csv'
        print(os.path.join(save_path, csv_file))
        df.to_csv(os.path.join(save_path, csv_file), index=False)