# Dataset Creation from Train video
1. First check if the files are in Google Drive; if so, then just copy from there (maybe)
1. Generate JPG images from MP4 file and move to "original/images"
2. Generate TXT labels from train_mot_.txt and move to "original/labels"
3. Separate into Train, Validation and Test datasets

TODO
- Consider resizing to 640x640 images (also see this post about possible problems with this)
    - https://github.com/ultralytics/ultralytics/issues/4510
- Remove original images after copying to train,valid,test folders
- Add number of frames to dataset root folder to experiment with lower number of frames
- Improve the format of the image names (maybe)

# Pip packages

In [4]:
!pip install tqdm
!pip install ultralytics

Collecting ultralytics
  Using cached ultralytics-8.0.209-py3-none-any.whl.metadata (31 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Using cached torch-2.1.0-cp38-cp38-manylinux1_x86_64.whl.metadata (25 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Using cached torchvision-0.16.0-cp38-cp38-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting pandas>=1.1.4 (from ultralytics)
  Using cached pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting seaborn>=0.11.0 (from ultralytics)
  Using cached seaborn-0.13.0-py3-none-any.whl.metadata (5.3 kB)
Collecting py-cpuinfo (from ultralytics)
  Using cached py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Collecting thop>=0.1.1 (from ultralytics)
  Using cached thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting tzdata>=2022.1 (from pandas>=1.1.4->ultralytics)
  Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)
Collecting filelock (from torch>=1.8.0->ultralytics)
  Using cached filelo

# Filenames definitions

In [5]:
import os 

data = '/home/mario/ftc'
original = os.path.join(data,'original')
original_images = os.path.join(original,'images')
original_labels = os.path.join(original,'labels')
train = os.path.join(data,'train')
train_images=os.path.join(train,'images')
train_labels=os.path.join(train,'labels')
valid = os.path.join(data,'valid')
valid_images=os.path.join(valid,'images')
valid_labels=os.path.join(valid,'labels')
test = os.path.join(data,'test')
test_images=os.path.join(test,'images')
test_labels=os.path.join(test,'labels')

train_video=os.path.join(data,'FTC-2024-data/Train/train.mp4')
train_labels_file=os.path.join(data,'FTC-2024-data/Train/train_gt_mot.txt')
development_video=os.path.join(data,'FTC-2024-data/Development/development.mp4')
test_video=os.path.join(data,'FTC-2024-data/Test/test.mp4')


## Generate original images from mp4 file

In [7]:
import cv2
import os
from tqdm import tqdm

# Copy images from drive if they exist there
if not os.path.exists(original):
    if not os.path.exists(original_images):
        os.makedirs(original_images)

    capture = cv2.VideoCapture(train_video)
    total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    # total_frames=100

    for f in tqdm(range(1,total_frames+1)):
        _,frame=capture.read()
        cv2.imwrite(os.path.join(original_images,str(f).zfill(6)+".jpg"),frame, [cv2.IMWRITE_JPEG_QUALITY, 100])
        # cv2.imwrite(newPath, frame, [cv2.IMWRITE_JPEG_QUALITY, 100])
        # cv2.imencode('.jpg', frame)[1].tofile(newPath)

    capture.release()
    #print("Last frame number: " + str(frames - 1))
    print("Images outputed: " + str(len(os.listdir(original_images))))


100%|██████████| 10000/10000 [15:40<00:00, 10.63it/s]

Images outputed: 10000





# Labels

In [8]:
#extract labels into each frames to label_foler

if not os.path.exists(original_labels):
    video_width = 0
    video_height = 0
    cap = cv2.VideoCapture(train_video)
    if cap.isOpened():
        video_width  = cap.get(cv2.CAP_PROP_FRAME_WIDTH)   # float `width`
        video_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float `height`
    cap.release()

    print("Video width: " + str(video_width))
    print("Video height: " + str(video_height))

    if not os.path.exists(original_labels):
        os.makedirs(original_labels)


    print("Start translate labels into YOLO format.")
    print("Label source: " + train_labels_file)
    print("Label destnation: " + original_labels)

    current_frame = -1
    with open(train_labels_file) as f:
        current_file = 0
        for line in f:
            line = line.strip()
            frame, bid, top, left, width, height, _, _, _, _ = line.split(" ")
            frame, bid, left, top, width, height = int(frame), int(bid), float(left), float(top), int(width), int(height)
            center_x = left + width / 2
            center_y = top + height/ 2
            rel_center_x = center_x / video_width
            rel_center_y = center_y / video_height
            rel_width = width / video_width
            rel_height = height / video_height
            if frame != current_frame:
                if current_file != 0:
                    current_file.close()
                if frame == total_frames + 1:
                    break
            current_file = open(os.path.join(original_labels, str(frame).zfill(6) + ".txt"), 'w')
            #current_file.write("class_id center_x center_y bbox_width bbox_height\n")
            current_frame = frame
            current_file.write("0 " + str(rel_center_x) + " " + str(rel_center_y) + " " + str(rel_width) + " " + str(rel_height) + "\n")

    print("Translation finished!")
    print("Last frame number: " + str(current_frame))
    print("Labels outputed: " + str(len(os.listdir(original_labels))))

Video width: 2456.0
Video height: 2058.0
Start translate labels into YOLO format.
Label source: /home/mario/ftc/FTC-2024-data/Train/train_gt_mot.txt
Label destnation: /home/mario/ftc/original/labels
Translation finished!
Last frame number: 10000
Labels outputed: 10000


# Train Test 

In [9]:
import os
import shutil
import random


if not os.path.exists(train):
    ids =[i for i in range(1,total_frames+1)]
    random.shuffle(ids)

    train_offset = int(0.75*total_frames)
    valid_offset = int(train_offset+0.15*total_frames)
    test_offset  = int(valid_offset+0.10*total_frames)
    print("train_offset:",train_offset)
    print("valid_offset:",valid_offset)
    print("test_offset:",test_offset)
    print()
    train_ids = ids[:train_offset]
    valid_ids = ids[train_offset:valid_offset]
    test_ids  = ids[valid_offset:]


    print('len(train):',len(train_ids))
    print('len(valid_ids):',len(valid_ids))
    print('len(train):',len(test_ids))
    print()
    # Create folders
    folders=[train,valid,test]
    subfolders=["images","labels"]

    for f in folders:
        for sf in subfolders:
            folder_fn = os.path.join(f,sf)
            if not os.path.exists(folder_fn):
                os.makedirs(folder_fn)

    #move training set to image_train_folder and label_train_folder
    for i in range(total_frames):
        if i<train_offset:
            destination_folder= train
        elif i<valid_offset:
            destination_folder= valid
        else:
            destination_folder= test
        destination_images=os.path.join(destination_folder,"images")
        destination_labels=os.path.join(destination_folder,"labels")

        source      = os.path.join(original_images,str(ids[i]).zfill(6)+".jpg")
        destination = os.path.join(destination_images,str(ids[i]).zfill(6)+".jpg")
        shutil.copy(source, destination)

        source      = os.path.join(original_labels,str(ids[i]).zfill(6)+".txt")
        destination = os.path.join(destination_labels,str(ids[i]).zfill(6)+".txt")

        shutil.copy(source, destination)


    # Validate sizes of folders
    #image count in colab vm
    print("Train images: ",len(os.listdir(train_images)))
    print("Train labels: ",len(os.listdir(train_labels)))
    print("Valid images: ",len(os.listdir(valid_images)))
    print("Valid labels: ",len(os.listdir(valid_labels)))
    print("Test images: ",len(os.listdir(test_images)))
    print("Test labels: ",len(os.listdir(test_labels)))
    

train_offset: 7500
valid_offset: 9000
test_offset: 10000

len(train): 7500
len(valid_ids): 1500
len(train): 1000

Train images:  7500
Train labels:  7500
Valid images:  1500
Valid labels:  1500
Test images:  1000
Test labels:  1000
