In [1]:
import os
import re
import json
from PIL import Image

from element_type import TYPE_DICT

DICT_TYPE = {v: k for k, v in TYPE_DICT.items()}  # converted TYPE_DICT

def read_text_file(file_path):
    content = None
    for encoding in ['cp949', 'utf-16', 'utf-8']:
        try:
            with open(file_path, 'r', encoding=encoding) as f:
                content = f.read()
        except Exception:
            pass

    if content is None:
        raise RuntimeError(f"Failed to read {file_path}")
    return content


## Convert coordinate ( annotated -> yolo )
#### Annotated coordinates: (top left x, top left y, bottom right x, bottom right y)

> #### -> YOLO coordinate values: Normalized (center X, center Y, width, height)

In [2]:
def xyxy_to_normalized_mxywh(xyxy_bbox, image_width, image_height, y_offset):
    if image_width <= 0 or image_height <= 0:
        raise Exception(f'Image info invalid : {image_width} {image_height}')
    x1, y1, x2, y2 = xyxy_bbox
    y1 -= y_offset
    y2 -= y_offset
    width = x2- x1
    height = y2 - y1
    mx = x1 + width * 0.5
    my = y1 + height * 0.5
    return mx / image_width, my / image_height, width / image_width, height / image_height

In [3]:
key_list = list(TYPE_DICT.keys())
key_list.sort()
class_list_for_yolo = []
for key in key_list:
    class_list_for_yolo.append(TYPE_DICT[key])

### Run the notebook in step1 first

### Check for the existence of ./unique_imgs.json

In [4]:
duplicated_image_removed_list = './unique_imgs.json'
assert os.path.isfile(duplicated_image_removed_list)
with open(duplicated_image_removed_list, 'r') as f:
    unique_imgs_data = f.read()
unique_imgs_data = json.loads(unique_imgs_data)

In [6]:
dataset_name = 'miniwob_dataset_test'
train_image_path = f'../../datasets/{dataset_name}/images/train2024'
valid_image_path = f'../../datasets/{dataset_name}/images/valid2024'
train_label_path = f'../../datasets/{dataset_name}/labels/train2024'
valid_label_path = f'../../datasets/{dataset_name}/labels/valid2024'
os.makedirs(train_image_path, exist_ok=True)
os.makedirs(valid_image_path, exist_ok=True)
os.makedirs(train_label_path, exist_ok=True)
os.makedirs(valid_label_path, exist_ok=True)

In [7]:
def save_yolo_data(data_list, save_dir_path, save_name):
    file_full_path = os.path.join(save_dir_path, save_name)
    with open(file_full_path,  'w') as f:
        for label_data in data_list:
            f.write(label_data+'\n')

In [8]:
key_count_dict_train = {key: 0 for key in class_list_for_yolo}
key_count_dict_valid = {key: 0 for key in class_list_for_yolo}

search_button_count = 0
for task_category_key in unique_imgs_data.keys():
    unique_image_list = [action_json for action_json in unique_imgs_data[task_category_key]]
    for image_fullpath in unique_image_list:
        annotation_fullpath = image_fullpath[:-3]+"json"

        original_image = Image.open(image_fullpath)
        # with Utterance offset set 0
        y_offset = 0
        target_image = original_image.crop((0, y_offset, 160,210))

        json_data = read_text_file(annotation_fullpath)
        element_list = json.loads(json_data)

        # check seed for split train/valid
        match = re.match(r'^(.+?)_([0-9]+)__', annotation_fullpath)
        if match:
            task_name = match.group(1)
            seeds_digits = match.group(2)
        
        save_label_path = train_label_path
        save_image_path = train_image_path
        key_count_dict = key_count_dict_train
        if seeds_digits in ['1000']:
            save_label_path = valid_label_path
            save_image_path = valid_image_path
            key_count_dict = key_count_dict_valid

        save_image_name = f"{os.path.split(task_name)[-1]}_{seeds_digits}_{os.path.split(image_fullpath)[-1]}"
    
        image_width, image_height = target_image.size
        converted_datalist = []
        for element in element_list:
            converted_coords = xyxy_to_normalized_mxywh(element['coords'], image_width, image_height, y_offset)
            if 'type' in element.keys():
                if 'subtype' in element and element['subtype'] == 'search':
                    search_button_count+=1
                    continue                    
                yolo_class_idx = class_list_for_yolo.index(TYPE_DICT[element['type']])
                key_count_dict[TYPE_DICT[element['type']]] += 1
                converted_datalist.append(f"{yolo_class_idx} {converted_coords[0]} {converted_coords[1]} {converted_coords[2]} {converted_coords[3]}")
            else:
                raise Exception(f"Element info invalid : {element}")
        if len(converted_datalist) == 0:
            print(f"Check data : {annotation_fullpath}")
        else:
            save_label_name = f"{os.path.splitext(save_image_name)[0]}.txt"
            save_yolo_data(converted_datalist, save_label_path, save_label_name)
            target_image.save(os.path.join(save_image_path, save_image_name))
    



In [None]:
print(f"nc: {len(class_list_for_yolo)}")
print(f"names: {class_list_for_yolo}")