In [3]:
import os
import numpy as np
from pycocotools.coco import COCO
from pycocotools import mask as maskUtils
import matplotlib.pyplot as plt
from PIL import Image
import yaml

annotation_file = 'Football/COCO_Football Pixel.json'
image_folder = 'Football/images'
image_list_path = 'images_list.yaml'

dataset_path = 'Football/dataset'

image_list = yaml.load(open(image_list_path, 'r'), Loader=yaml.FullLoader)['images']


coco = COCO(annotation_file)


cat_id_map = { cat_id: i for i, cat_id in enumerate(coco.getCatIds()) }




coco = COCO(annotation_file)

image_ids = coco.getImgIds()

def load_image(image_id, coco, image_folder):
    image_info = coco.loadImgs(image_id)[0]
    image_path = os.path.join(image_folder, image_info['file_name'])
    image = Image.open(image_path)
    return image, image_info

def generate_segmentation_mask(image_info, annotations, width, height, coco):
    mask = np.zeros((height, width), dtype=np.uint8)
    
    for annotation in annotations:
        category_id = cat_id_map[annotation['category_id']]
        rle = coco.annToRLE(annotation)
        binary_mask = maskUtils.decode(rle)
        mask[binary_mask == 1] = category_id
        
    return mask

batch_images = []
batch_masks = []

for image_id in image_ids:
    image, image_info = load_image(image_id, coco, image_folder)
    if image_info['file_name'] not in image_list:
        continue
    annotation_ids = coco.getAnnIds(imgIds=image_id)
    annotations = coco.loadAnns(annotation_ids)
    
    width, height = image_info['width'], image_info['height']
    segmentation_mask = generate_segmentation_mask(image_info, annotations, width, height, coco)
    
    batch_images.append(image)
    batch_masks.append(segmentation_mask)

print("Batch Masks Shape:", batch_masks[0].shape)
print("Number of Images:", len(batch_images))
print("Number of Masks:", len(batch_masks))


Loading annotations into memory...
Done (t=0.36s)
Creating index...
index created!
Loading annotations into memory...
Done (t=0.40s)
Creating index...
index created!
Batch Masks Shape: (1080, 1920)
Number of Images: 59
Number of Masks: 59


In [4]:
from datasets import Dataset

batch_masks = [Image.fromarray(mask, mode='L') for mask in batch_masks]
dataset = Dataset.from_dict({
    'image': batch_images,
    'mask': batch_masks
})
dataset.save_to_disk(dataset_path)

Saving the dataset (0/1 shards):   0%|          | 0/59 [00:00<?, ? examples/s]

In [None]:

def display_image_and_mask_with_filename(image, mask, filename, ax):
    ax.imshow(image)
    ax.imshow(mask, cmap='jet', alpha=0.5)
    ax.axis('off')
    ax.set_title(filename, fontsize=12)


num_images = 5
fig, axes = plt.subplots(num_images, 2, figsize=(12, num_images * 4))

for i in range(20):
    index = np.random.randint(len(batch_images))
    image = batch_images[index]
    mask = batch_masks[index]
    filename = coco.loadImgs(image_ids[index])[0]['file_name']
    
    display_image_and_mask_with_filename(image, mask, filename, axes[i, 0])
    axes[i, 0].set_title('Image with Mask')
    
    axes[i, 1].imshow(mask, cmap='jet')
    axes[i, 1].set_title('Mask Only')
    axes[i, 1].axis('off')
    axes[i, 1].set_title(filename, fontsize=12)

plt.tight_layout()
plt.show()
