In [2]:
import pandas as pd
from tqdm import tqdm
import json
from glob import glob
from pathlib import Path

#read csvs
DATASET = '.\\dataset'
EXPERIMENT_NAME = 'baseline'
ANNOTATION_OUTPUT = 'annotation'
ANNOTATION_BACK_OUTPUT = 'annotation_back'


In [3]:

qr_codes = pd.read_csv(f'{DATASET}/{EXPERIMENT_NAME}/qr_codes_all.csv', dtype={'image_id': str, 'object_id': str})
fips = pd.read_csv(f'{DATASET}/{EXPERIMENT_NAME}/fips_all.csv', dtype={'image_id': str, 'object_id': str})

train = pd.read_csv(f'{DATASET}/{EXPERIMENT_NAME}/qr_codes_train.csv', dtype={'image_id': str, 'object_id': str})
valid = pd.read_csv(f'{DATASET}/{EXPERIMENT_NAME}/qr_codes_valid.csv', dtype={'image_id': str, 'object_id': str})
test = pd.read_csv(f'{DATASET}/{EXPERIMENT_NAME}/qr_codes_test.csv', dtype={'image_id': str, 'object_id': str})


In [4]:
# CSV to JSONs
input = qr_codes['image_id'].unique()

def is_a_small_qrcode(xmin, ymin, xmax, ymax, width, height):
    xmin_scaled = xmin * 480 / width
    ymin_scaled = ymin * 480 / height
    xmax_scaled = xmax * 480 / width
    ymax_scaled = ymax * 480 / height
    area_scaled = abs(xmax_scaled-xmin_scaled) * abs(ymax_scaled-ymin_scaled)
    return area_scaled < 500 

for i in tqdm(range(len(input)), desc='Converting Dataset Annotations to Labelme format'):
    entry = input[i]

    entry_qr_codes = qr_codes[qr_codes['image_id']==entry].reset_index()
    entry_fips = fips[fips['image_id']==entry].reset_index()

    entry_set = 'not_used'
    if entry in train['image_id'].values:
        entry_set = 'train'
    elif entry in valid['image_id'].values:
        entry_set = 'valid'
    elif entry in test['image_id'].values:
        entry_set = 'test'

    # Data to be written
    attribute_has_small_qrcode = False
    dictionary ={
        "version": "5.0.1",
        "flags": {f'{entry_set}': True, 'inconsistency_missing_bb': False, 'inconsistency_bb_misplaced': False, 'inconsistency_other': False, 'attribute_has_occlusion': False, 'attribute_is_unfocused': False,  'attribute_has_small_qrcode': False},
        "imagePath": f'..\\images\\{entry}.jpg',
        "imageData": None,
        "imageHeight": eval(str(entry_qr_codes.iloc[0]["image_height"])),
        "imageWidth": eval(str(entry_qr_codes.iloc[0]["image_width"]))
    }
    
    shapes = []
    for index, row in entry_qr_codes.iterrows():
        if is_a_small_qrcode(row['xmin'], row['ymin'], row['xmax'], row['ymax'], dictionary['imageWidth'], dictionary['imageHeight']):
            attribute_has_small_qrcode = True
            dictionary['flags']['attribute_has_small_qrcode'] = True

        shape = {
            "label": "qr_code",
            "points": [
                [
                eval(str(row['xmin'])),
                eval(str(row['ymin']))
                ],
                [
                eval(str(row['xmax'])),
                eval(str(row['ymax']))
                ]
            ],
            "group_id": eval(str(row['object_id'])),
            "shape_type": "rectangle",
            "flags": {}
        }
        shapes.append(shape)

    for index, row in entry_fips.iterrows():
        shape = {
            "label": "fips",
            "points": [
                [
                eval(str(row['xmin'])),
                eval(str(row['ymin']))
                ],
                [
                eval(str(row['xmax'])),
                eval(str(row['ymax']))
                ]
            ],
            "group_id": eval(str(row['object_id'])),
            "shape_type": "rectangle",
            "flags": {}
        }
        shapes.append(shape)

    dictionary['shapes'] = shapes

    with open(f'{DATASET}/{EXPERIMENT_NAME}/{ANNOTATION_OUTPUT}/{entry}.json', "w") as outfile:
        json.dump(dictionary, outfile)

    # print(entry)

Converting Dataset Annotations to Labelme format: 100%|██████████| 767/767 [00:04<00:00, 155.58it/s]


** Comando para executar o Labelme sem salvar cópia da imagem na anotação **
```
Labelme.exe --nodata
```

### From JSONs to CSVs

In [28]:


csv_output_names = ['qr_codes_all', 'qr_codes_train', 'qr_codes_valid', 'qr_codes_test', 'fips_all', 'fips_train', 'fips_valid', 'fips_test']
lists = {}

# Initializing output lists
for name in csv_output_names:
    lists[name] = []


json_filenames = glob(f"{DATASET}\\{EXPERIMENT_NAME}\\{ANNOTATION_OUTPUT}\\*.json")
for filename in tqdm(json_filenames, desc='Reading source json annotations'):
    file = open(filename)
    data = json.load(file)
    image_id = Path(data['imagePath']).stem
    image_height = data['imageHeight']
    image_width = data['imageWidth']
    shapes = data['shapes']
    train = 'train' in data['flags']
    valid = 'valid' in data['flags']
    test = 'test' in data['flags']

    qr_code_list = []
    fips_list = []
    for shape in shapes:
        xmin = shape['points'][0][0]
        ymin = shape['points'][0][1]
        xmax = shape['points'][1][0]
        ymax = shape['points'][1][1]
        object_id = shape['group_id']

        current = [image_id, image_height, image_width, object_id, 'not_defined', xmin, ymin, xmax, ymax ]

        if shape['label'] == 'qr_code':
            current[4] = 'qr_code'
            lists['qr_codes_all'].append(current)
            if train:
                lists['qr_codes_train'].append(current)
            elif valid:
                lists['qr_codes_valid'].append(current)
            elif test:
                lists['qr_codes_test'].append(current)
        else:
            current[4] = 'fip'
            lists['fips_all'].append(current)
            if train:
                lists['fips_train'].append(current)
            elif valid:
                lists['fips_valid'].append(current)
            elif test:
                lists['fips_test'].append(current)
    file.close()

for name in tqdm(csv_output_names, desc='Writing resulting CSV files.') :
    header = [ 'image_id', 'image_height', 'image_width', 'object_id', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    df = pd.DataFrame(lists[name], columns = header)
    df = df.astype({"xmin": int, "ymin": int, "xmax": int, "ymax": int}, errors='raise') 
    df.sort_values(by=['object_id', 'xmin', 'ymin'], inplace=True)
    df.to_csv(f'{DATASET}/{EXPERIMENT_NAME}/{ANNOTATION_BACK_OUTPUT}/{name}.csv', index=False, line_terminator='\n')


Reading source json annotations: 100%|██████████| 767/767 [00:00<00:00, 4309.04it/s]
Writing resulting CSV files.: 100%|██████████| 8/8 [00:00<00:00, 14.54it/s]


**Counting annotated flags**

In [26]:
report = {
    'all': {'count': 0},
    'train': {'count': 0},
    'valid': {'count': 0},
    'test': {'count': 0}
}

json_filenames = glob(f"{DATASET}\\{EXPERIMENT_NAME}\\{ANNOTATION_OUTPUT}\\*.json")
for filename in tqdm(json_filenames, desc='Reading source json annotations'):
    file = open(filename)
    data = json.load(file)
    image_id = Path(data['imagePath']).stem
    train = 'train' in data['flags']
    valid = 'valid' in data['flags']
    test = 'test' in data['flags']
    flags = data['flags']
    flags.pop('train', None)
    flags.pop('valid', None)
    flags.pop('test', None)
    flags.pop('small_qrcode', None)

    report['all']['count'] += 1    
    for flag in flags.keys():
        if flags[flag]:
            if flag not in report['all']:
                report['all'][flag] = 0
            if flag not in report['train']:
                report['train'][flag] = 0
            if flag not in report['valid']:
                report['valid'][flag] = 0
            if flag not in report['test']:
                report['test'][flag] = 0

            report['all'][flag] += 1

            if train:
                report['train'][flag] += 1
            elif valid:
                report['valid'][flag] += 1
            elif test:
                report['test'][flag] += 1

    if train:
        report['train']['count'] += 1
    elif valid:
        report['valid']['count'] += 1
    elif test:
        report['test']['count'] += 1

# print(report)
print(json.dumps(report, sort_keys=True, indent=4))

Reading source json annotations: 100%|██████████| 767/767 [00:00<00:00, 3723.34it/s]

{
    "all": {
        "attribute_has_occlusion": 23,
        "attribute_has_small_qrcode": 54,
        "attribute_is_unfocused": 24,
        "count": 767,
        "inconsistency_bb_misplaced": 26,
        "inconsistency_missing_bb": 9,
        "inconsistency_other": 2
    },
    "test": {
        "attribute_has_occlusion": 6,
        "attribute_has_small_qrcode": 5,
        "attribute_is_unfocused": 4,
        "count": 100,
        "inconsistency_bb_misplaced": 7,
        "inconsistency_missing_bb": 1,
        "inconsistency_other": 0
    },
    "train": {
        "attribute_has_occlusion": 13,
        "attribute_has_small_qrcode": 41,
        "attribute_is_unfocused": 16,
        "count": 567,
        "inconsistency_bb_misplaced": 17,
        "inconsistency_missing_bb": 6,
        "inconsistency_other": 2
    },
    "valid": {
        "attribute_has_occlusion": 4,
        "attribute_has_small_qrcode": 8,
        "attribute_is_unfocused": 4,
        "count": 100,
        "inconsistenc




In [27]:
search_in = 'all'
what = 'inconsistency_bb_misplaced'

result = []

json_filenames = glob(f"{DATASET}\\{EXPERIMENT_NAME}\\{ANNOTATION_OUTPUT}\\*.json")
for filename in tqdm(json_filenames, desc='Reading source json annotations'):
    file = open(filename)
    data = json.load(file)
    image_id = Path(data['imagePath']).stem
    train = 'train' in data['flags']
    valid = 'valid' in data['flags']
    test = 'test' in data['flags']
    flags = data['flags']

    if flags[what]:
        if search_in == 'all':
            result.append(image_id)
        elif search_in == 'train':
            if train:
                result.append(image_id)
        elif search_in == 'valid':
            if valid:
                result.append(image_id)
        elif search_in == 'test':
            if test:
                result.append(image_id)


print(result)

Reading source json annotations: 100%|██████████| 767/767 [00:00<00:00, 4565.73it/s]

['0193927093', '1134131119', '1213992780', '1476633170', '2124747258', '2126492335', '2332977452', '2423843117', '2425729926', '2468137356', '2880662710', '2943752525', '2961178381', '2985146218', '2986368381', '4059282918', '4352836095', '4353596623', '4353977850', '4354342598', '4413934670', '4413992286', '4567642653', '4597824773', '4957719411', '5121473172']



