In [7]:
import pandas as pd
import numpy as np
from pathlib import Path
import xml.etree.ElementTree as ET
import os
from tqdm import tqdm

class Reader:
    def __init__(self, voc_root):
        """Pascal VOC format data reader

        Args:
            voc_root (str) - pascal voc format data root

        """
        self.voc_root = Path(voc_root)
        self.xml_root = self.voc_root #/ 'Annotations'
        self.txt_root = self.voc_root #/ 'ImageSets' / 'Main'
        self.img_root = self.voc_root #/ 'JPEGImages'
        self.df = pd.DataFrame(self.parse_xml(self.xml_root))
        self.df['img_path'] = self.df.stem.map(lambda x: self.img_root / (x + '.jpg'))
        train_array, val_array, train_val_array, test_array = self.parse_txt(self.txt_root)
        self.df['train'] = self.df.stem.isin(train_array)
        self.df['val'] = self.df.stem.isin(val_array)
        self.df['trainval'] = self.df.stem.isin(train_val_array)
        self.df['test'] = self.df.stem.isin(test_array)

    def parse_xml(self, xml_root):
        ret = []
        for i in tqdm(xml_root.glob('*.xml')):
            ret.extend(self.parse_single_xml(i))
        return ret

    def parse_single_xml(self, xml_path):
        ret = []

        tree = ET.parse(str(xml_path))
        root = tree.getroot()
        size = root.find('size')
        if size:
            width = int(size.find('width').text)
            height = int(size.find('height').text)
            depth = int(size.find('depth').text)
        else:
            width, height, depth = -1, -1, -1

        for obj in root.findall('object'):
            name = obj.find('name').text
            difficult = int(obj.find('difficult').text)
            bnd_box = obj.find('bndbox')
            bbox = [
                int(float(bnd_box.find('xmin').text)),
                int(float(bnd_box.find('ymin').text)),
                int(float(bnd_box.find('xmax').text)),
                int(float(bnd_box.find('ymax').text))
            ]
            h, w = bbox[3]-bbox[1], bbox[2]-bbox[0]
            ratio = h / w
            ret.append(dict(name=name, difficult=difficult,
                x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3],
                xml_path=xml_path, stem=xml_path.stem,
                width=width, height=height, depth=depth,
                h=h, w=w, ratio=ratio))
        return ret

    def parse_txt(self, txt_root):
        train_array=self.parse_single_txt(txt_root / 'train.txt')
        test_array=self.parse_single_txt(txt_root / 'test.txt')
        val_array=self.parse_single_txt(txt_root / 'val.txt')
        train_val_array=self.parse_single_txt(txt_root / 'trainval.txt')
        return train_array, val_array, train_val_array, test_array

    def parse_single_txt(self, txt_path):
        if not os.path.exists(txt_path):
            return []
        with open(txt_path) as f:
            return list(f.readlines())

    def show(self):
        import cv2
        grouped = self.df.groupby('stem')
        # length = self.df.
        length = self.df.stem.unique().size
        indexes = self.df.stem.unique()
        def nothing(x):
            index = indexes[x]
            sub_table = grouped.get_group(index)
            img_path = str(sub_table.iloc[0]['img_path'])
            img = cv2.imread(img_path)
            for index, row in sub_table.iterrows():
                cv2.rectangle(img, (row['x1'], row['y1']), (row['x2'], row['y2']),
                              (0, 255, 0))
            # img = cv2.resize(img, (800, 800))
            cv2.imshow('image_win', img)
            # cv2.waitKey()

        cv2.namedWindow('image_win')
        cv2.namedWindow('tracker_win')
        cv2.createTrackbar('tracker', 'tracker_win', 0, length - 1, nothing)

        if cv2.waitKey() == 'q':
            cv2.destroyAllWindows()


## Read XML in folder

In [17]:
reader = Reader("../debug_images/")
reader.df

47it [00:00, 17482.47it/s]


Unnamed: 0,name,difficult,x1,y1,x2,y2,xml_path,stem,width,height,depth,h,w,ratio,img_path,train,val,trainval,test
0,person,0,438,0,610,178,../debug_images/08d2f74608d979d4.xml,08d2f74608d979d4,1024,683,3,178,172,1.034884,../debug_images/08d2f74608d979d4.jpg,False,False,False,False
1,person,0,288,135,622,507,../debug_images/06767fca65292b4e.xml,06767fca65292b4e,1024,768,3,372,334,1.113772,../debug_images/06767fca65292b4e.jpg,False,False,False,False
2,face,0,645,351,817,535,../debug_images/06b81bcf2f8926ce.xml,06b81bcf2f8926ce,1024,891,3,184,172,1.069767,../debug_images/06b81bcf2f8926ce.jpg,False,False,False,False
3,person,0,377,383,503,542,../debug_images/09e22dd8a7a408c7.xml,09e22dd8a7a408c7,683,1024,3,159,126,1.261905,../debug_images/09e22dd8a7a408c7.jpg,False,False,False,False
4,person,0,719,198,895,452,../debug_images/090a4114c1aab740.xml,090a4114c1aab740,1024,682,3,254,176,1.443182,../debug_images/090a4114c1aab740.jpg,False,False,False,False
5,face,0,189,189,271,276,../debug_images/01cd02c52b29cdc9.xml,01cd02c52b29cdc9,1024,683,3,87,82,1.060976,../debug_images/01cd02c52b29cdc9.jpg,False,False,False,False
6,person,0,0,0,427,323,../debug_images/137ebaa8a573cce5.xml,137ebaa8a573cce5,1024,683,3,323,427,0.75644,../debug_images/137ebaa8a573cce5.jpg,False,False,False,False
7,person,0,235,363,349,450,../debug_images/0320bbc92e400191.xml,0320bbc92e400191,1024,683,3,87,114,0.763158,../debug_images/0320bbc92e400191.jpg,False,False,False,False
8,person,0,596,0,763,155,../debug_images/0c34a0541a29fa37.xml,0c34a0541a29fa37,1024,768,3,155,167,0.928144,../debug_images/0c34a0541a29fa37.jpg,False,False,False,False
9,person,0,363,0,727,135,../debug_images/10586f34aa8f4549.xml,10586f34aa8f4549,1024,819,3,135,364,0.370879,../debug_images/10586f34aa8f4549.jpg,False,False,False,False


## To fastdup format

In [16]:
annots_df = reader.df
annots_df = annots_df[['name', 'x1', 'y1', 'x2', 'y2', 'img_path']]
annots_df.rename(columns={'name':'label', 'img_path':'filename'}, inplace=True)

# Convert bounding boxes
annots_df['col_x'] = (annots_df['x1'] + annots_df['x2']) / 2
annots_df['row_y'] = (annots_df['y1'] + annots_df['y2']) / 2
annots_df['width'] = annots_df['x2'] - annots_df['x1']
annots_df['height'] = annots_df['y2'] - annots_df['y1']

annots_df = annots_df[['filename', 'col_x', 'row_y', 'width', 'height', 'label']]

annots_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annots_df.rename(columns={'name':'label', 'img_path':'filename'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annots_df['col_x'] = (annots_df['x1'] + annots_df['x2']) / 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annots_df['row_y'] = (annots_df['y1'] + annots_df['y2']) / 2
A value is trying to be set on a copy of a slice from a Data

Unnamed: 0,filename,col_x,row_y,width,height,label
0,../debug_images/08d2f74608d979d4.jpg,524.0,89.0,172,178,person
1,../debug_images/06767fca65292b4e.jpg,455.0,321.0,334,372,person
2,../debug_images/06b81bcf2f8926ce.jpg,731.0,443.0,172,184,face
3,../debug_images/09e22dd8a7a408c7.jpg,440.0,462.5,126,159,person
4,../debug_images/090a4114c1aab740.jpg,807.0,325.0,176,254,person
5,../debug_images/01cd02c52b29cdc9.jpg,230.0,232.5,82,87,face
6,../debug_images/137ebaa8a573cce5.jpg,213.5,161.5,427,323,person
7,../debug_images/0320bbc92e400191.jpg,292.0,406.5,114,87,person
8,../debug_images/0c34a0541a29fa37.jpg,679.5,77.5,167,155,person
9,../debug_images/10586f34aa8f4549.jpg,545.0,67.5,364,135,person
