Nguồn tham khảo:
- [Visual Wake word detection - on cAInvas](https://medium.com/ai-techsystems/visual-wake-word-detection-on-cainvas-6ec3424b497e).
- [Mainly for training, partly for collect and relabel data](https://github.com/tensorflow/tflite-micro/blob/main/tensorflow/lite/micro/examples/person_detection/training_a_model.md).
- [Data source](https://www.kaggle.com/datasets/jeffaudi/coco-2014-dataset-for-yolov3?resource=download).

In [1]:
import pandas as pd
import json
import numpy as np
import os
from PIL import Image

ax = json.load(open('coco2014/annotations/instances_val2014.json')) # add link to json file from COCO official website
ax = ax['annotations']  # annotations hold category information

In [2]:
# Get all picture ids
ids = {a['image_id'] for a in ax}

print('Number of images: ', len(ids))

# We just take a subset of the images
# n = 0.01 * len(ids)
# ids = np.random.choice(list(ids), int(n), replace=False)
# print('New number of images: ', len(ids))

Number of images:  40137


In [3]:
clm = ['image_id', 'category_id', 'bbox', 'area', 'iscrowd']

rows = []
for i in range(len(ax)):
    id = ax[i]['image_id']
    if id in ids:
        iname = str(id).zfill(12)
        new_row = {'image_id': iname, 'category_id': ax[i]['category_id'], 'bbox': ax[i]['bbox'], 'area': ax[i]['area'], 'iscrowd': ax[i]['iscrowd']}
        rows.append(new_row)

df = pd.DataFrame(rows, columns=clm)

img_names = np.unique(df['image_id'])

In [4]:
def get_annotations(img_name):
    anns = df[df['image_id'] == img_name]
    return anns

def get_path(img_name):
    img_name = 'COCO_val2014_' + img_name + '.jpg'
    return os.path.join('coco2014/images/val2014/', img_name)

def get_size(img_name):
    img = Image.open(get_path(img_name))
    width, height = img.size
    return width, height

def check_human_annotation(img_name, threshold=0.005):
    '''
    Check if the image has human annotation. Human's area is larger than threshold.
    If there is a human:
        - If the human is in the left third of the image, return True, 'left'
        - If the human is in the right third of the image, return True, 'right'
        - If the human cannot be categorized as left or right, return True, 'none'
    If there is no human, return False, 'none'
    '''
    anns = get_annotations(img_name)
    width, height = get_size(img_name)
    human_anns = anns[(anns['category_id'] == 1) & (anns['area'] > threshold * width * height)]
    if len(human_anns) == 0:
        return False, 'none'

    main_human = human_anns['area'].argmax()

    if len(human_anns) > 1 and human_anns['area'].values[main_human] < 0.7 * human_anns['area'].sum():
        return True, 'none'

    bbox = human_anns['bbox'].values[0]
    left_x, right_x = bbox[0], bbox[0] + bbox[2]

    if right_x < 0.33 * width:
        return True, 'left'
    elif left_x > 0.66 * width:
        return True, 'right'
    else:
        return True, 'none'
        
# Test
def test(img_name):
    result = check_human_annotation(img_name)
    # Visualize the image
    # Also, if there is a human, visualize the bounding box
    from matplotlib import pyplot as plt

    def show_image(img_name):
        img = Image.open(get_path(img_name))
        if result[0]:
            anns = get_annotations(img_name)
            human_anns = anns[anns['category_id'] == 1]
            width, height = get_size(img_name)
            threshold = 0.005
            human_anns = human_anns[human_anns['area'] > threshold * width * height]
            # Biggest human
            main_human = human_anns['area'].argmax()
            for human in range(len(human_anns)):
                bbox = human_anns['bbox'].values[human]
                print(bbox)
                left_x = bbox[0]
                right_x = bbox[0] + bbox[2]
                top_y = bbox[1]
                bottom_y = bbox[1] + bbox[3]
                if human == main_human:
                    plt.plot([left_x, right_x], [top_y, top_y], color='g')
                    plt.plot([left_x, right_x], [bottom_y, bottom_y], color='g')
                    plt.plot([left_x, left_x], [top_y, bottom_y], color='g')
                    plt.plot([right_x, right_x], [top_y, bottom_y], color='g')
                else:
                    plt.plot([left_x, right_x], [top_y, top_y], color='r')
                    plt.plot([left_x, right_x], [bottom_y, bottom_y], color='r')
                    plt.plot([left_x, left_x], [top_y, bottom_y], color='r')
                    plt.plot([right_x, right_x], [top_y, bottom_y], color='r')
        plt.imshow(img)
        plt.show()

    show_image(img_name)

# test('000000000113')

In [5]:
# We will separate the images into 4 groups: Have person on the left, right, just have person (but can't determine left or right), and no person
left = []
right = []
no = []
both = [] # Or neither

for i in range(len(img_names)):
    img = img_names[i]
    img_path = get_path(img)

    # Check if image exists
    if os.path.isfile(img_path):
        # Check if image has person
        has_human, side = check_human_annotation(img)
        if has_human:
            if side == 'left':
                left.append(img)
            elif side == 'right':
                right.append(img)
            else:
                both.append(img)
        else:
            no.append(img)

print('Number of images with person on the left: ', len(left))
print('Number of images with person on the right: ', len(right))
print('Number of images with person but cannot determine left or right: ', len(both))
print('Number of images with no person: ', len(no))

Number of images with person on the left:  852
Number of images with person on the right:  856
Number of images with person but cannot determine left or right:  17399
Number of images with no person:  21030


In [6]:
# Make a new folder for the new dataset
os.mkdir('vwwd')
os.mkdir('vwwd/left')
os.mkdir('vwwd/right')
os.mkdir('vwwd/both')
os.mkdir('vwwd/no')

# Copy images to the new folder
import shutil
for img in left:
    shutil.copy(get_path(img), 'vwwd/left')

for img in right:
    shutil.copy(get_path(img), 'vwwd/right')

for img in both:
    shutil.copy(get_path(img), 'vwwd/both')

for img in no:
    shutil.copy(get_path(img), 'vwwd/no')

In [9]:
# Select 1000 files from the both and no folders
import random

def select_1k(folder):
    os.mkdir(folder + '_1k')
    files = os.listdir(folder)
    random.shuffle(files)
    files = files[:1000]
    for file in files:
        shutil.move(os.path.join(folder, file), os.path.join(folder + '_1k', file))

select_1k('vwwd/both')
select_1k('vwwd/no')