In [1]:
from pycocotools.coco import COCO
import requests
import cv2

## Download COCO samples for `person` class

### Load annotations

In [2]:
coco = COCO(r"E:\GitHub\porsche-students-contest\dataset\coco-related\annotations_trainval2017\annotations\instances_train2017.json")

loading annotations into memory...
Done (t=13.60s)
creating index...
index created!


### Get only ids from `person` class

In [3]:
catIds = coco.getCatIds(catNms=['person'])

In [4]:
imgIds = coco.getImgIds(catIds=catIds)
images = coco.loadImgs(imgIds)

### Take each image and it's annotations and save them locally

In [7]:
for idx, im in enumerate(images):
    if idx < 3682:
        # Get image from `coco_url`
        img_data = requests.get(im['coco_url']).content

        #  Get image data, such as ID, width and height and filename
        img_id = im['id']
        img_width = im['width']
        img_height = im['height']
        file_name = im['file_name'].split(".")[0]

        # Load annotations for that specific image
        annId = coco.getAnnIds(imgIds=im['id'], catIds=catIds)
        anns = coco.loadAnns(annId)

        # Write annotations file in annotations folder
        file_object = open(r"E:/GitHub/porsche-students-contest/dataset/coco-related/person/labels/" + file_name + ".txt", "a")

        # Convert annotations from COCO to YOLO format
        for ann in anns:
            current_category = 2
            current_bbox = ann['bbox']
            x = current_bbox[0]
            y = current_bbox[1]
            w = current_bbox[2]
            h = current_bbox[3]

            # Finding midpoints
            x_centre = (x + (x+w))/2
            y_centre = (y + (y+h))/2

            # Normalization
            x_centre = x_centre / img_width
            y_centre = y_centre / img_height
            w = w / img_width
            h = h / img_height

            # Limiting upto fix number of decimal places
            x_centre = format(x_centre, '.6f')
            y_centre = format(y_centre, '.6f')
            w = format(w, '.6f')
            h = format(h, '.6f')

            # Writing current annotation
            file_object.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")
            
        # Close file_object for current sample
        file_object.close()

        # Write image in images folder
        with open(r'E:/GitHub/porsche-students-contest/dataset/coco-related/person/images/' + im['file_name'], 'wb') as handler:
            handler.write(img_data)
        
    else:
        break

### Split `person` dataset into train-test-val

In [8]:
import random
import shutil
import os

In [10]:
imgs_path = r"E:/GitHub/porsche-students-contest/dataset/coco-related/person/images/"
lbls_path = r"E:/GitHub/porsche-students-contest/dataset/coco-related/person/labels/"

test_imgs_path = r"E:/GitHub/porsche-students-contest/dataset/test/images/"
test_lbls_path = r"E:/GitHub/porsche-students-contest/dataset/test/labels/"

val_imgs_path = r"E:/GitHub/porsche-students-contest/dataset/validation/images/"
val_lbls_path = r"E:/GitHub/porsche-students-contest/dataset/validation/labels/"

In [14]:
# Read images
images = os.listdir(imgs_path)

# Select random indexes for test dataset
random_test_idxs = random.sample(range(len(images)), 398)

# Move images from base to test
for idx in random_test_idxs:
    filename = images[idx].split(".")[0]
    shutil.move(os.path.join(imgs_path, images[idx]), os.path.join(test_imgs_path, images[idx]))
    shutil.move(os.path.join(lbls_path, filename + ".txt"), os.path.join(test_lbls_path, filename + ".txt"))

In [15]:
# Read the remaining images
images = os.listdir(imgs_path)

# Select random indexes for validation dataset
random_val_idxs = random.sample(range(len(images)), 750)

# Move images from base to validation
for idx in random_val_idxs:
    filename = images[idx].split(".")[0]
    shutil.move(os.path.join(imgs_path, images[idx]), os.path.join(val_imgs_path, images[idx]))
    shutil.move(os.path.join(lbls_path, filename + ".txt"), os.path.join(val_lbls_path, filename + ".txt"))

## Change class for latest photos

In [2]:
import os

In [21]:
labels = os.listdir(r"E:/GitHub/porsche-students-contest/dataset/train/labels")

In [22]:
labels_new = [label for label in labels if "Strisce" in label]

In [23]:
for label in labels_new:
    lines = []
    with open(os.path.join(r"E:/GitHub/porsche-students-contest/dataset/train/labels", label)) as file_in:
        for line in file_in:
            lines.append(line)
    new_lines = []
    for line in lines:
        if line[0] == "0":
            new_line = "1" + line[1:]
            new_lines.append(new_line)
    os.remove(os.path.join(r"E:/GitHub/porsche-students-contest/dataset/train/labels", label))
    with open(os.path.join(r"E:/GitHub/porsche-students-contest/dataset/train/labels", label), 'w') as f:
        for line in new_lines:
            f.write(line)