In [1]:
import os
from pycocotools.coco import COCO
import shutil

In [5]:
# Initialize COCO annotation file path and image directory path
anno_file = '/Users/erictommathews/Documents/Eric/Msc Artificial Intelligence - Stirling/Semester 3 Spring/Deep Learning/ITNPAI1-SkyGrove-test/Dataset/Glasgow/split_data/instances_default.json'
img_dir = '/Users/erictommathews/Documents/Eric/Msc Artificial Intelligence - Stirling/Semester 3 Spring/Deep Learning/ITNPAI1-SkyGrove-test/Dataset/Glasgow/split_data'

# Initialize train and validation directory paths
train_dir = '/Users/erictommathews/Documents/Eric/Msc Artificial Intelligence - Stirling/Semester 3 Spring/Deep Learning/ITNPAI1-SkyGrove-test/Dataset/Glasgow/split_data/train'
val_dir = '/Users/erictommathews/Documents/Eric/Msc Artificial Intelligence - Stirling/Semester 3 Spring/Deep Learning/ITNPAI1-SkyGrove-test/Dataset/Glasgow/split_data/validation'

# Load COCO annotation file
coco = COCO(anno_file)

# Get all image IDs
img_ids = coco.getImgIds()

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [6]:


# Create train and validation directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

In [7]:
# Remove the IDs of the images that are not annotated
ann_img_ids = set(coco.getImgIds())
img_ids = set(img_ids)
unann_img_ids = img_ids - ann_img_ids
for img_id in unann_img_ids:
    img_info = coco.loadImgs(ids=[img_id])[0]
    img_filename = img_info['file_name']
    img_path = os.path.join(img_dir, img_filename)
    if os.path.exists(img_path):
        os.remove(img_path)

In [8]:
# Calculate number of images for train and validation sets
train_count = int(0.5 * len(ann_img_ids))
val_count = len(ann_img_ids) - train_count

In [9]:
# Shuffle annotated image IDs
import random
random.seed(42)
ann_img_ids = list(ann_img_ids)
random.shuffle(ann_img_ids)

In [10]:
# Copy images to train and validation directories and update annotation files
train_ann = {"images": [], "annotations": [], "categories": coco.dataset["categories"]}
val_ann = {"images": [], "annotations": [], "categories": coco.dataset["categories"]}
for i, img_id in enumerate(ann_img_ids):
    img_info = coco.loadImgs(ids=[img_id])[0]
    img_filename = img_info['file_name']
    img_path = os.path.join(img_dir, img_filename)
    if os.path.exists(img_path):
        if i < train_count:
            shutil.copy(img_path, os.path.join(train_dir, img_filename))
            anns = coco.imgToAnns[img_id]
            for ann in anns:
                ann["image_id"] = len(train_ann["images"])
                train_ann["annotations"].append(ann)
            train_ann["images"].append(img_info)
        else:
            shutil.copy(img_path, os.path.join(val_dir, img_filename))
            anns = coco.imgToAnns[img_id]
            for ann in anns:
                ann["image_id"] = len(val_ann["images"])
                val_ann["annotations"].append(ann)
            val_ann["images"].append(img_info)

In [11]:
import json

# Save train and validation annotation files
with open(os.path.join(train_dir, 'annotations.json'), 'w') as f:
    json.dump(train_ann, f)
with open(os.path.join(val_dir, 'annotations.json'), 'w') as f:
    json.dump(val_ann, f)
