In [81]:
import os
import copy
import glob
import shutil
from tqdm import tqdm
import pandas as pd
import cv2
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split 

## Get data

In [72]:
base_dir = '/Users/danya/Documents/project/python/computer_vision_rocket/data/hw-02/barcodes-annotated-gorai'

img_dir = os.path.join(base_dir, 'images')
img_ext = '.jpg'
img_filenames = sorted(glob.glob(img_dir + f'/*{img_ext}'))

xml_dir = os.path.join(base_dir, 'Annotations')
ann_df = pd.read_csv(os.path.join(base_dir, 'full_annotation.tsv'), sep='\t')
ann_df.head()

Unnamed: 0,filename,code,p1,p2
0,c234c76c-d72d-4d60-8066-e99f62361c1c--ru.a6a77...,4810153026194,"(542, 210)","(685, 489)"
1,0b56af7e-386c-410a-8f46-74350f755d77--ru.4c720...,8714100708408,"(474, 216)","(782, 625)"
2,77c7bd8d-bb64-4c4b-94c5-798a903eaa4d--ru.12f59...,4820240030508,"(657, 173)","(950, 762)"
3,5d60b81b-e31d-4530-bed1-27a8872d1f02--ru.53fed...,4607001770350,"(665, 66)","(841, 615)"
4,bb225480-a447-4232-85ab-ce22f1dec070--ru.09dd5...,4605035006964,"(592, 269)","(839, 717)"


## Convert tsv to xml

In [23]:
obj_root_base = Element('annotation')

obj_folder = SubElement(obj_root_base, 'folder').text = os.path.basename(img_dir)
obj_img_name = SubElement(obj_root_base, 'filename').text = ''
obj_img_path = SubElement(obj_root_base, 'path').text = ''
obj_img_source = SubElement(obj_root_base, 'source')
obj_img_source_db = SubElement(obj_img_source, 'database').text = 'Unknown'
obj_img_size = SubElement(obj_root_base, 'size')
obj_img_size_w = SubElement(obj_img_size, 'width').text = ''
obj_img_size_h = SubElement(obj_img_size, 'height').text = ''
obj_img_size_w = SubElement(obj_img_size, 'depth').text = ''
obj_img_segmented = SubElement(obj_root_base, 'segmented').text = '0'

print(tostring(obj_root_base))

obj_root = Element('object')
obj_name = SubElement(obj_root, 'name').text = 'label'
obj_pose = SubElement(obj_root, 'pose').text = 'Unspecified'
obj_truncated = SubElement(obj_root, 'truncated').text = '0'
obj_difficult = SubElement(obj_root, 'difficult').text = '0'

obj_bndbox = SubElement(obj_root, 'bndbox')
obj_bndbox_xmin = SubElement(obj_bndbox, 'xmin')
obj_bndbox_ymin = SubElement(obj_bndbox, 'ymin')
obj_bndbox_xmax = SubElement(obj_bndbox, 'xmax')
obj_bndbox_ymax = SubElement(obj_bndbox, 'ymax')

print(tostring(obj_root))

b'<annotation><folder>images</folder><filename /><path /><source><database>Unknown</database></source><size><width /><height /><depth /></size><segmented>0</segmented></annotation>'
b'<object><name>label</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin /><ymin /><xmax /><ymax /></bndbox></object>'


In [50]:
if not os.path.exists(xml_dir):
    os.makedirs(xml_dir, exist_ok=True)

for img_filename in tqdm(img_filenames):
    img = cv2.imread(img_filename)
    h, w, c = img.shape[:3]
    
    ann_block = copy.deepcopy(obj_root_base)
    
    ann_block.find('filename').text = os.path.basename(img_filename)
    ann_block.find('path').text = img_filename
    
    ann_block.find('size/width').text = str(w)
    ann_block.find('size/height').text = str(h)
    ann_block.find('size/depth').text = str(c)
    
    # add objects info
    try:
        p1, p2 = ann_df.loc[ann_df.filename == os.path.basename(img_filename), 'p1':].values[0]
    except:
        continue
    p1, p2 = eval(p1), eval(p2)
    
    obj_block = copy.deepcopy(obj_root)
    obj_block.find('name').text = 'barcode'
    obj_block.find('bndbox/xmin').text = str(p1[1])
    obj_block.find('bndbox/ymin').text = str(p1[0])
    obj_block.find('bndbox/xmax').text = str(p2[1])
    obj_block.find('bndbox/ymax').text = str(p2[0])
    
    ann_block.append(obj_block)
    
    # save xml
    xml_filename = os.path.join(xml_dir, os.path.basename(img_filename).replace(img_ext, '.xml'))
    ann_block.write(xml_filename)


100%|█████████████████████████████████████████| 537/537 [00:08<00:00, 62.76it/s]


## Train/val split

In [61]:
lst = []
for img_filename in tqdm(img_filenames):
    
    img = cv2.imread(img_filename)
    
    xml_file = open(img_filename.replace('images', 'Annotations').replace('.jpg', '.xml'))
    tree = ET.parse(xml_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        x1 = int(obj.find('bndbox/xmin').text)
        y1 = int(obj.find('bndbox/ymin').text)
        x2 = int(obj.find('bndbox/xmax').text)
        y2 = int(obj.find('bndbox/ymax').text)
        cls = obj.find('name').text
        
        obj_crop = img[y1:y2, x1:x2]
        bgr_mean = obj_crop.mean()
        
        wo = x2 - x1
        ho = y2 - y1
        
        lst.append([os.path.basename(img_filename), cls, w, h, wo, ho, wo > ho, wo * ho, wo * ho / (w * h), bgr_mean])        

        
df_ann_stat = pd.DataFrame(lst, columns=['filename', 'class', 'img_w', 'img_h', 'obj_w', 'obj_h', 
                                         'obj_horiz', 'obj_square', 'obj_square_share', 'bgr_mean'])
df_ann_stat.head()

Unnamed: 0,filename,class,img_w,img_h,obj_w,obj_h,obj_horiz,obj_square,obj_square_share,bgr_mean
0,000a8eff-08fb-4907-8b34-7a13ca7e37ea--ru.8e3b8...,barcode,1000,1333,457,217,True,99169,0.074395,96.117409
1,000e5549-1ed8-4a1a-b303-fc1aed948864--ru.d4fc3...,barcode,1000,1333,601,234,True,140634,0.105502,41.782793
2,0014c021-b5ec-40e1-86d9-8eb1e06da178--ru.fedd3...,barcode,1000,2220,855,346,True,295830,0.133257,94.18033
3,015a42c2-bf4d-4c0c-a91f-f1c9b2f0daa5--ru.a7c1a...,barcode,1000,1333,417,303,True,126351,0.094787,124.467502
4,02798fff-1662-4a67-a8aa-c82722f2f1e5--ru.818a1...,barcode,1000,1333,345,120,True,41400,0.031058,94.083728


In [98]:
df_ann_stat.describe()

Unnamed: 0,img_w,img_h,obj_w,obj_h,obj_square,obj_square_share,bgr_mean
count,537.0,537.0,537.0,537.0,537.0,537.0,537.0
mean,1249.409683,1542.01676,555.886406,291.851024,234444.9,0.093657,130.938592
std,726.235676,742.089814,375.118519,213.143854,539330.8,0.068909,31.027171
min,609.0,480.0,130.0,68.0,8840.0,0.006632,41.06637
25%,1000.0,1333.0,366.0,175.0,64792.0,0.045675,110.991816
50%,1000.0,1333.0,470.0,239.0,111180.0,0.075907,131.615044
75%,1000.0,1333.0,601.0,333.0,194832.0,0.12244,150.956575
max,5760.0,5760.0,3124.0,2000.0,6134000.0,0.419709,217.494817


In [69]:
n = 3

df_ann_stat_str = df_ann_stat.copy()
df_ann_stat_str[f'obj_square_share_q{n}'] = pd.cut(df_ann_stat_str['obj_square_share'], n)
df_ann_stat_str[f'bgr_mean_q{n}'] = pd.cut(df_ann_stat_str['bgr_mean'], n)

In [71]:
df_ann_stat_str.head()

Unnamed: 0,filename,class,img_w,img_h,obj_w,obj_h,obj_horiz,obj_square,obj_square_share,bgr_mean,obj_square_share_q5,bgr_mean_q5,obj_square_share_q3,bgr_mean_q3
0,000a8eff-08fb-4907-8b34-7a13ca7e37ea--ru.8e3b8...,barcode,1000,1333,457,217,True,99169,0.074395,96.117409,"(0.00622, 0.0892]","(76.352, 111.638]","(0.00622, 0.144]","(40.89, 99.876]"
1,000e5549-1ed8-4a1a-b303-fc1aed948864--ru.d4fc3...,barcode,1000,1333,601,234,True,140634,0.105502,41.782793,"(0.0892, 0.172]","(40.89, 76.352]","(0.00622, 0.144]","(40.89, 99.876]"
2,0014c021-b5ec-40e1-86d9-8eb1e06da178--ru.fedd3...,barcode,1000,2220,855,346,True,295830,0.133257,94.18033,"(0.0892, 0.172]","(76.352, 111.638]","(0.00622, 0.144]","(40.89, 99.876]"
3,015a42c2-bf4d-4c0c-a91f-f1c9b2f0daa5--ru.a7c1a...,barcode,1000,1333,417,303,True,126351,0.094787,124.467502,"(0.0892, 0.172]","(111.638, 146.923]","(0.00622, 0.144]","(99.876, 158.685]"
4,02798fff-1662-4a67-a8aa-c82722f2f1e5--ru.818a1...,barcode,1000,1333,345,120,True,41400,0.031058,94.083728,"(0.00622, 0.0892]","(76.352, 111.638]","(0.00622, 0.144]","(40.89, 99.876]"


In [78]:
trainval_txt = os.path.join(base_dir, 'ImageSets', 'Main', 'trainval_202301.txt')
if os.path.exists(trainval_txt):
    os.remove(trainval_txt)

with open(trainval_txt, 'w') as f:
    for line in list(df_ann_stat_str['filename']):
        f.write(f'{line}\n')
                     
for test_share in [10, 20, 30]:
    
    train_imgs, val_imgs = train_test_split(list(df_ann_stat_str['filename']), test_size=test_share/100, 
                                            random_state=42, 
                                            stratify=df_ann_stat_str[[f'obj_square_share_q{n}', f'bgr_mean_q{n}']])
    print(len(train_imgs), len(val_imgs))

    train_txt = os.path.join(base_dir, 'ImageSets', 'Main', f'train_{100 - test_share}_{test_share}.txt')
    val_txt = os.path.join(base_dir, 'ImageSets', 'Main', f'test_{100 - test_share}_{test_share}.txt')

    if os.path.exists(train_txt):
        os.remove(train_txt)
    with open(train_txt, 'w') as f:
        pass

    for filename in train_imgs:
        with open(train_txt, 'a') as f:
            f.write(filename.replace(img_ext, '') + '\n')

    if os.path.exists(val_txt):
        os.remove(val_txt)
    with open(val_txt, 'w') as f:
        pass

    for filename in val_imgs:
        with open(val_txt, 'a') as f:
            f.write(filename.replace(img_ext, '') + '\n')

483 54
429 108
375 162


## Prepare dataset (xml to yolo format)

In [88]:
save_dir = os.path.join(base_dir, 'yolo_dataset')

In [89]:
yaml = {
    'names': {
        0: 'barcode',
    }
}
yaml

{'names': {0: 'barcode'}}

In [79]:
def convert_label(old_lb_path, new_lb_path, image_id):
    def convert_box(size, box):
        dw, dh = 1. / size[0], 1. / size[1]
        x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
        return x * dw, y * dh, w * dw, h * dh

    in_file = open(old_lb_path)
    out_file = open(new_lb_path, 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    names = list(yaml['names'].values())  # names list
    for obj in root.iter('object'):
        cls = obj.find('name').text
        if cls in names and int(obj.find('difficult').text) != 1:
            xmlbox = obj.find('bndbox')
            bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
            cls_id = names.index(cls)  # class id
            out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')

In [77]:
data = {
    'train': 'train_80_20.txt',
    'val': 'test_80_20.txt',
}

In [90]:
for image_set in data.keys():

    print(image_set)

    new_images_dir = os.path.join(save_dir, image_set, 'images')
    new_labels_dir = os.path.join(save_dir, image_set, 'labels')

    if not os.path.exists(new_images_dir):
        os.makedirs(new_images_dir)

    if not os.path.exists(new_labels_dir):
        os.makedirs(new_labels_dir)

    # read images ids
    with open(os.path.join(base_dir, f'ImageSets/Main/{data[image_set]}')) as f:
        image_ids = f.read().strip().split()
        print(len(image_ids))

    for image_id in tqdm(image_ids):

        # old paths
        old_image_path = os.path.join(base_dir, f'images/{image_id}{img_ext}')
        old_label_path = os.path.join(base_dir, f'Annotations/{image_id}.xml')

        # new paths
        new_image_path = os.path.join(new_images_dir, f'{image_id}{img_ext}')
        new_label_path = os.path.join(new_labels_dir, f'{image_id}.txt')

        shutil.copy2(old_image_path, new_image_path)  # copy image
        convert_label(old_label_path, new_label_path, image_id)  # convert labels to YOLO format

train
429


100%|████████████████████████████████████████| 429/429 [00:00<00:00, 694.84it/s]


val
108


100%|████████████████████████████████████████| 108/108 [00:00<00:00, 762.10it/s]
