In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import xml.etree.ElementTree as ET
import os
from tqdm import tqdm

class Reader:
    def __init__(self, voc_root):
        """Pascal VOC format data reader

        Args:
            voc_root (str) - pascal voc format data root

        """
        self.voc_root = Path(voc_root)
        self.xml_root = self.voc_root #/ 'Annotations'
        self.txt_root = self.voc_root #/ 'ImageSets' / 'Main'
        self.img_root = self.voc_root #/ 'JPEGImages'
        self.df = pd.DataFrame(self.parse_xml(self.xml_root))
        self.df['img_path'] = self.df.stem.map(lambda x: self.img_root / (x + '.jpg'))
        train_array, val_array, train_val_array, test_array = self.parse_txt(self.txt_root)
        self.df['train'] = self.df.stem.isin(train_array)
        self.df['val'] = self.df.stem.isin(val_array)
        self.df['trainval'] = self.df.stem.isin(train_val_array)
        self.df['test'] = self.df.stem.isin(test_array)

    def parse_xml(self, xml_root):
        ret = []
        for i in tqdm(xml_root.glob('*.xml')):
            ret.extend(self.parse_single_xml(i))
        return ret

    def parse_single_xml(self, xml_path):
        ret = []

        tree = ET.parse(str(xml_path))
        root = tree.getroot()
        size = root.find('size')
        if size:
            width = int(size.find('width').text)
            height = int(size.find('height').text)
            depth = int(size.find('depth').text)
        else:
            width, height, depth = -1, -1, -1

        for obj in root.findall('object'):
            name = obj.find('name').text
            difficult = int(obj.find('difficult').text)
            bnd_box = obj.find('bndbox')
            bbox = [
                int(float(bnd_box.find('xmin').text)),
                int(float(bnd_box.find('ymin').text)),
                int(float(bnd_box.find('xmax').text)),
                int(float(bnd_box.find('ymax').text))
            ]
            h, w = bbox[3]-bbox[1], bbox[2]-bbox[0]
            ratio = h / w
            ret.append(dict(name=name, difficult=difficult,
                x1=bbox[0], y1=bbox[1], x2=bbox[2], y2=bbox[3],
                xml_path=xml_path, stem=xml_path.stem,
                width=width, height=height, depth=depth,
                h=h, w=w, ratio=ratio))
        return ret

    def parse_txt(self, txt_root):
        train_array=self.parse_single_txt(txt_root / 'train.txt')
        test_array=self.parse_single_txt(txt_root / 'test.txt')
        val_array=self.parse_single_txt(txt_root / 'val.txt')
        train_val_array=self.parse_single_txt(txt_root / 'trainval.txt')
        return train_array, val_array, train_val_array, test_array

    def parse_single_txt(self, txt_path):
        if not os.path.exists(txt_path):
            return []
        with open(txt_path) as f:
            return list(f.readlines())

    def show(self):
        import cv2
        grouped = self.df.groupby('stem')
        # length = self.df.
        length = self.df.stem.unique().size
        indexes = self.df.stem.unique()
        def nothing(x):
            index = indexes[x]
            sub_table = grouped.get_group(index)
            img_path = str(sub_table.iloc[0]['img_path'])
            img = cv2.imread(img_path)
            for index, row in sub_table.iterrows():
                cv2.rectangle(img, (row['x1'], row['y1']), (row['x2'], row['y2']),
                              (0, 255, 0))
            # img = cv2.resize(img, (800, 800))
            cv2.imshow('image_win', img)
            # cv2.waitKey()

        cv2.namedWindow('image_win')
        cv2.namedWindow('tracker_win')
        cv2.createTrackbar('tracker', 'tracker_win', 0, length - 1, nothing)

        if cv2.waitKey() == 'q':
            cv2.destroyAllWindows()


## Read XML in folder

In [2]:
reader = Reader("../debug_images/")
reader.df

29it [00:00, 11218.85it/s]


Unnamed: 0,name,difficult,x1,y1,x2,y2,xml_path,stem,width,height,depth,h,w,ratio,img_path,train,val,trainval,test
0,face,0,645,351,818,536,../debug_images/06b81bcf2f8926ce.xml,06b81bcf2f8926ce,1024,891,3,185,173,1.069364,../debug_images/06b81bcf2f8926ce.jpg,False,False,False,False
1,person,0,13,215,845,869,../debug_images/06b81bcf2f8926ce.xml,06b81bcf2f8926ce,1024,891,3,654,832,0.786058,../debug_images/06b81bcf2f8926ce.jpg,False,False,False,False
2,person,0,377,383,504,543,../debug_images/09e22dd8a7a408c7.xml,09e22dd8a7a408c7,683,1024,3,160,127,1.259843,../debug_images/09e22dd8a7a408c7.jpg,False,False,False,False
3,person,0,580,374,679,628,../debug_images/09e22dd8a7a408c7.xml,09e22dd8a7a408c7,683,1024,3,254,99,2.565657,../debug_images/09e22dd8a7a408c7.jpg,False,False,False,False
4,face,0,385,403,430,467,../debug_images/09e22dd8a7a408c7.xml,09e22dd8a7a408c7,683,1024,3,64,45,1.422222,../debug_images/09e22dd8a7a408c7.jpg,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,person,0,433,118,487,200,../debug_images/0bf36835f38ef50b.xml,0bf36835f38ef50b,1024,575,3,82,54,1.518519,../debug_images/0bf36835f38ef50b.jpg,False,False,False,False
121,person,0,356,124,420,213,../debug_images/0bf36835f38ef50b.xml,0bf36835f38ef50b,1024,575,3,89,64,1.390625,../debug_images/0bf36835f38ef50b.jpg,False,False,False,False
122,person,0,622,325,751,458,../debug_images/00d458259fe8a694.xml,00d458259fe8a694,1024,683,3,133,129,1.031008,../debug_images/00d458259fe8a694.jpg,False,False,False,False
123,face,0,406,269,487,339,../debug_images/06eb8da7ead1ca12.xml,06eb8da7ead1ca12,1024,768,3,70,81,0.864198,../debug_images/06eb8da7ead1ca12.jpg,False,False,False,False


## To fastdup format

In [3]:
annots_df = reader.df
annots_df = annots_df[['name', 'x1', 'y1', 'x2', 'y2', 'img_path']]
annots_df.rename(columns={'name':'label', 'img_path':'filename'}, inplace=True)

# Convert bounding boxes
annots_df['col_x'] = (annots_df['x1'] + annots_df['x2']) / 2
annots_df['row_y'] = (annots_df['y1'] + annots_df['y2']) / 2
annots_df['width'] = annots_df['x2'] - annots_df['x1']
annots_df['height'] = annots_df['y2'] - annots_df['y1']

annots_df = annots_df[['filename', 'col_x', 'row_y', 'width', 'height', 'label']]
annots_df['filename'] = annots_df['filename'].astype(str)

annots_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annots_df.rename(columns={'name':'label', 'img_path':'filename'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annots_df['col_x'] = (annots_df['x1'] + annots_df['x2']) / 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annots_df['row_y'] = (annots_df['y1'] + annots_df['y2']) / 2
A value is trying to be set on a copy of a slice from a Data

Unnamed: 0,filename,col_x,row_y,width,height,label
0,../debug_images/06b81bcf2f8926ce.jpg,731.5,443.5,173,185,face
1,../debug_images/06b81bcf2f8926ce.jpg,429.0,542.0,832,654,person
2,../debug_images/09e22dd8a7a408c7.jpg,440.5,463.0,127,160,person
3,../debug_images/09e22dd8a7a408c7.jpg,629.5,501.0,99,254,person
4,../debug_images/09e22dd8a7a408c7.jpg,407.5,435.0,45,64,face
...,...,...,...,...,...,...
120,../debug_images/0bf36835f38ef50b.jpg,460.0,159.0,54,82,person
121,../debug_images/0bf36835f38ef50b.jpg,388.0,168.5,64,89,person
122,../debug_images/00d458259fe8a694.jpg,686.5,391.5,129,133,person
123,../debug_images/06eb8da7ead1ca12.jpg,446.5,304.0,81,70,face


In [4]:
annots_df.dtypes

filename     object
col_x       float64
row_y       float64
width         int64
height        int64
label        object
dtype: object

## Merge corrected annotations to previous annotations

In [5]:
df = pd.read_parquet("../parquets/clean_detection_results_debug_images.parquet")
df

Unnamed: 0,filename,score,col_x,row_y,width,height,label,outlier_distance,outlier
0,../debug_images/0000862a233105cc.jpg,0.832481,316.5,447.0,155,118,person,,False
1,../debug_images/002e555bd63acb71.jpg,0.853143,696.0,347.0,648,670,person,,False
2,../debug_images/002e555bd63acb71.jpg,0.791996,166.5,624.0,141,112,person,,False
3,../debug_images/002e555bd63acb71.jpg,0.759568,66.5,599.5,129,163,person,,False
4,../debug_images/002e555bd63acb71.jpg,0.757808,289.0,590.5,122,183,person,,False
...,...,...,...,...,...,...,...,...,...
1053,../debug_images/138c4698f334e5ff.jpg,0.380026,529.5,396.0,989,570,person,,False
1054,../debug_images/13a887093768b5ec.jpg,0.710493,495.5,440.5,245,209,person,,False
1055,../debug_images/13c67cdd79ab35f0.jpg,0.407185,450.5,271.5,275,157,person,,False
1056,../debug_images/13c67cdd79ab35f0.jpg,0.350427,601.5,354.5,279,149,person,,False


In [13]:
# Discard images with annotations issues to avoid duplicate annotations done in streamlit

filename_to_remove = annots_df['filename'].unique()

In [15]:
df = df[~df['filename'].isin(filename_to_remove)]
df

Unnamed: 0,filename,col_x,row_y,width,height,label
0,../debug_images/0000862a233105cc.jpg,316.5,447.0,155,118,person
1,../debug_images/002e555bd63acb71.jpg,696.0,347.0,648,670,person
2,../debug_images/002e555bd63acb71.jpg,166.5,624.0,141,112,person
3,../debug_images/002e555bd63acb71.jpg,66.5,599.5,129,163,person
4,../debug_images/002e555bd63acb71.jpg,289.0,590.5,122,183,person
...,...,...,...,...,...,...
1053,../debug_images/138c4698f334e5ff.jpg,529.5,396.0,989,570,person
1054,../debug_images/13a887093768b5ec.jpg,495.5,440.5,245,209,person
1055,../debug_images/13c67cdd79ab35f0.jpg,450.5,271.5,275,157,person
1056,../debug_images/13c67cdd79ab35f0.jpg,601.5,354.5,279,149,person


In [16]:
df_concat = pd.concat([df, annots_df], ignore_index=True)
df_concat

Unnamed: 0,filename,col_x,row_y,width,height,label
0,../debug_images/0000862a233105cc.jpg,316.5,447.0,155,118,person
1,../debug_images/002e555bd63acb71.jpg,696.0,347.0,648,670,person
2,../debug_images/002e555bd63acb71.jpg,166.5,624.0,141,112,person
3,../debug_images/002e555bd63acb71.jpg,66.5,599.5,129,163,person
4,../debug_images/002e555bd63acb71.jpg,289.0,590.5,122,183,person
...,...,...,...,...,...,...
1026,../debug_images/0bf36835f38ef50b.jpg,460.0,159.0,54,82,person
1027,../debug_images/0bf36835f38ef50b.jpg,388.0,168.5,64,89,person
1028,../debug_images/00d458259fe8a694.jpg,686.5,391.5,129,133,person
1029,../debug_images/06eb8da7ead1ca12.jpg,446.5,304.0,81,70,face


In [17]:
df_concat.to_parquet("../parquets/corrected_clean_detection_results_debug_images.parquet")