# Just Draft Notebook to prepare data from different datasets to one view.

In [10]:
import os
import cv2
import numpy as np
from PIL import Image
import json
from tqdm import tqdm

data_directory = 'data/roboflow/UI screenshots.v2i.coco/valid/'
annotation_file = 'data/roboflow/UI screenshots.v2i.coco/valid/_annotations.coco.json'
output_directory = 'data/roboflow_modified/valid/'

with open(annotation_file) as f:
    data = json.load(f)

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

masks = {}

for annotation in tqdm(data['annotations']):
    img_id = annotation['image_id']
    bbox = annotation['bbox']  # bbox format [x, y, width, height]
    bbox = [int(x) for x in bbox]
    if img_id not in masks:
        img_info = next((item for item in data['images'] if item['id'] == img_id), None)
        if not img_info:
            continue
        masks[img_id] = {
            'mask': np.zeros((img_info['height'], img_info['width']), dtype=np.uint8),
            'file_name': img_info['file_name']
        }
    x, y, w, h = bbox
    masks[img_id]['mask'][y:y+h, x:x+w] = 255

counter = 340 
for img_id, info in tqdm(masks.items()):
    image_path = os.path.join(data_directory, info['file_name'])
    image = cv2.imread(image_path)
    if image is None:
        continue  

    new_image_filename = f"image_{counter}.jpg"
    new_mask_filename = f"image_{counter}.png"
    new_image_path = os.path.join(output_directory, new_image_filename)
    new_mask_path = os.path.join(output_directory, new_mask_filename)

    cv2.imwrite(new_image_path, image)
    mask_image = Image.fromarray(info['mask'])
    mask_image.save(new_mask_path)

    counter += 1

print("Processing complete. Images and masks saved in:", output_directory)


100%|██████████| 11303/11303 [00:00<00:00, 272936.31it/s]
100%|██████████| 339/339 [00:03<00:00, 111.93it/s]

Processing complete. Images and masks saved in: data/roboflow_modified/valid/



