# Convert CVAT Image annotation to coco format

In [None]:
!pip install python-xml2dict

Collecting python-xml2dict
  Downloading python-xml2dict-0.1.1.tar.gz (2.5 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: python-xml2dict
  Building wheel for python-xml2dict (setup.py) ... [?25ldone
[?25h  Created wheel for python-xml2dict: filename=python_xml2dict-0.1.1-py3-none-any.whl size=2236 sha256=10c93286918e2e794ca4a732a1b40f84c5848537f3b865761c171d897aee5627
  Stored in directory: /home/anhvth8/.cache/pip/wheels/fb/72/4a/f22da03f1c385b59cfc5d1baa5b31d1453e3e5b7ca1fdef5e7
Successfully built python-xml2dict
Installing collected packages: python-xml2dict
Successfully installed python-xml2dict-0.1.1


In [None]:
%load_ext autoreload
%autoreload 2

from avcv.all import *
from fastcore.all import *
import xml2dict





def get_unique_name(row):
    columns = ['@xtl', '@ytl', '@xbr','@ybr', '@label', '@task_id']
    return '_'.join([row[c] for c in columns])

def load_track_xml2df(path):
    d_image = xml2dict.parse(open(path))['annotations']
    data_bboxes = []
    for track in d_track['track']:
        track_data = {k:track[k] for k in ['@id', '@label', '@task_id']}
        if isinstance(track['box'], dict):
            bboxes = [track['box']]
        elif isinstance(track['box'], list):
            bboxes = track['box']
        else:
            import ipdb; ipdb.set_trace()
        for bbox in bboxes:
            bbox = bbox.copy()
            bbox.update(track_data)
            if bbox['@outside'] == '0':
                data_bboxes.append(bbox)
    df_track = pd.DataFrame(data_bboxes)
    return df_track

def load_image_xml2df(path):
    d_image = xml2dict.parse(open(path))['annotations']

    data_bbox = []

    for image in d_image['image']:
        image_data = {k:image[k] for k in ['@name', '@height', '@width', '@task_id']}
        if 'box' in image:
            if isinstance(image['box'], dict):
                bboxes = [image['box']]
            elif isinstance(image['box'], list):
                bboxes = image['box']
            else:
                1/0
            for bbox in bboxes:
                bbox.update(image_data)
                data_bbox.append(bbox)
        else:
            data_bbox.append(image_data)
    df_image = pd.DataFrame(data_bbox)    
    return df_image
def merge_track_image_annotations(df_track, df_image):
    df_track['uname'] = df_track.apply(get_unique_name, 1)
    name_count = df_track.groupby('uname').size()
    df_track = df_track.set_index('uname')

    name_count.sort_values()


    df_image['uname'] = df_image.apply(get_unique_name, 1)
    df_image = df_image.set_index('uname')

    name_image = set(df_image.index.unique())
    name_track = set(df_track.index.unique())

    df_image = df_image.sort_index()
    df_track = df_track.sort_index()

    df_image['track_id'] = df_track['@id']

    def f(row):
        assert len(row['@label'].unique()) == 1, 'Invalid trackid {}, has more than one label per track???'.format(row.iloc[0]['track_id'])
    df_image.groupby('track_id').apply(f)
    return df_image

df_image = load_image_xml2df('/tmp/anns.xml')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# df_merge['is_action_frame'] = df_merge['@name'].apply(lambda x: x.startswith('Action_'))
# print(df_merge['is_action_frame'].mean())
# df_merge[df_merge['is_action_frame']]
def get_user(row):
    if row['@name'].startswith('Action_'):
        return row['@name'].split('/')[1].split('_')[0]
    else:
        return np.nan
df_image['user'] = df_image.apply(get_user, 1)
val_users = ['hungng', 'chungtd12', 'thomp4', 'thuyhv5']
df_image['is_val'] = df_image.user.apply(lambda x: x in val_users)

In [None]:
def to_coco_format(df):
    image_name2id = {v:k for k, v in enumerate(df['@name'].unique())}
    cat_name2id = {v:k for k, v in enumerate(df['@label'].unique())}

    out_dict = dict(images=[], annotations=[], categories=[])
    for _, row in df.iterrows():
        x, y, x2, y2 = [float(row[k]) for k in ['@xtl', '@ytl', '@xbr', '@ybr']]
        if np.isnan(x):
            continue
        w,h = x2-x, y2-y
        ann = dict(id=len(out_dict['annotations']),
                  bbox=[x,y,w,h],
                   area=h*w,
                   category_id=cat_name2id[row['@label']],
                   image_id=image_name2id[row['@name']],
                   # track_id=int(row['track_id']),
                  )
        out_dict['annotations'].append(ann)
    for k, v in image_name2id.items():
        out_dict['images'].append(dict(id=v, file_name=k))
    for k, v in cat_name2id.items():
        out_dict['categories'].append(dict(id=v, name=k))
    

    # cc = AvCOCO(out_dict)
    # out_dict['images'] = cc.loadImgs(cc.imgToAnns.keys())
    print('Num images', len(out_dict['images']))
    return out_dict
val_dict = to_coco_format(df_image[df_image.is_val])
train_dict = to_coco_format(df_image[~df_image.is_val])

Num images 5091
Num images 31939


In [None]:
missing_count = 0
count = 0
with open('/tmp/expect_img_list.txt', 'w') as f:
    for img in cc.img_ids:
        img_info = cc.gt.imgs[img]
        fn = osp.join(cc.img_dir, img_info['file_name'])
        f.write(fn+'\n')
        if not osp.exists(fn) :#and not '.jpg' in fn:
            missing_count += 1
            
        else:
            count += 1

(7802, 24137)

In [None]:
# cc = CocoDataset(train_dict, '/data/cvat-raw-images/')

In [None]:
# cc.visualize(show=1);

In [None]:
# img