<div style="text-align:center;">
Python script to extract images and respective .xml annotations from m2cai-tool-locations Dataset into 'extracted-data' directory, which is uploaded to Roboflow for conversion of Annotations to YOLOv5 PyTorch format 
</div>


In [1]:
import os
import cv2
import shutil
import random
random.seed(42)

In [2]:
img_path = os.path.join(os.getcwd(), 'JPEGImages')
extract_path = os.path.join(os.getcwd(), 'extracted_data')
annot_path = os.path.join(os.getcwd(), 'Annotations')

In [3]:
os.makedirs(extract_path, exist_ok = True)

In [6]:
v2, v3, v4 = [], [], []

for image in sorted(os.listdir(img_path)):
    if (image.startswith('v03')) and 'flip' not in image:
        v3.append(image)
    elif (image.startswith('v02')) and 'flip' not in image:
        v2.append(image)
    elif (image.startswith('v04')) and 'flip' not in image:
        v4.append(image)

In [7]:
len(v2) , len(v3), len(v4)

(266, 472, 192)

In [8]:
v2 = random.sample(v2, 250)
v3 = random.sample(v3, 250)

In [9]:
len(v2), len(v3)

(250, 250)

In [10]:
selected_images = v2 + v3

In [11]:
len(selected_images)

500

### extract images from raw data

In [12]:
def extract_images(img_path, extract_path, img_list):
    for image in os.listdir(img_path):
            if image in img_list:
                shutil.copy(os.path.join(img_path, image), os.path.join(extract_path, image))
    print('Images Extracted...')
    
def extract_annot(annot_path, extract_path, annot_list):
    for annot in os.listdir(annot_path):
            if annot in annot_list:
                shutil.copy(os.path.join(annot_path, annot), os.path.join(extract_path, annot))
    print('Annotations Extracted...')

In [13]:
extract_images(img_path, extract_path, selected_images)

Images Extracted...


In [14]:
selected_annot = [image.replace('.jpg', '.xml') for image in selected_images]

In [15]:
len(selected_annot)

500

In [16]:
extract_annot(annot_path, extract_path, selected_annot)

Annotations Extracted...


### stitch images to generate test video

In [17]:
first_img = cv2.imread(os.path.join(img_path, v4[0]))
height, width, _ = first_img.shape

In [18]:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
outWriter = cv2.VideoWriter('input.mp4', fourcc, 30.0, (width, height))

In [19]:
for image_file in v4:
    image_path = os.path.join(img_path, image_file)
    frame = cv2.imread(image_path)
    outWriter.write(frame)
outWriter.release()