<a href="https://colab.research.google.com/github/andreaderuvo/rc_car_tracking/blob/main/datasets/generate_training_files_for_yolo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
INPUT_ANNOTATED_IMAGES_FOLDER = 'annotated_images'
OUTPUT_TRAINING_FILES_FOLDER = 'training_files'
CLASSES = ['rc_car']
DATA_TRAIN_PERCENTAGE = 0.8
PREFIX_DATA_FOLDER_FOR_DARKNET = 'data'
PREFIX_WEIGHTS_FOLDER_FOR_DARKNET = 'weights'

In [None]:
import os
import random
import math
import shutil

In [None]:
#delete and create folder INPUT_ANNOTATED_IMAGES_FOLDER and OUTPUT_TRAINING_FILES_FOLDER
shutil.rmtree(INPUT_ANNOTATED_IMAGES_FOLDER, ignore_errors=True)
shutil.rmtree(OUTPUT_TRAINING_FILES_FOLDER, ignore_errors=True)
os.makedirs(INPUT_ANNOTATED_IMAGES_FOLDER, exist_ok=True)
os.makedirs(OUTPUT_TRAINING_FILES_FOLDER, exist_ok=True)

In [None]:
#check if the execution environment is Google Colab
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [None]:
#importing annotated images from Google Drive
DRIVE_MOUNT_FOLDER = 'drive'
DRIVE_ANNOTATED_IMAGES_FOLDER = 'MyDrive/rc_car_tracking/annotated_images'

if IN_COLAB:
  from google.colab import drive
  drive.mount(DRIVE_MOUNT_FOLDER, force_remount=True)
  annotated_images_folder = os.path.join(DRIVE_MOUNT_FOLDER, DRIVE_ANNOTATED_IMAGES_FOLDER)
  for file in os.listdir(annotated_images_folder):
    shutil.copy(os.path.join(annotated_images_folder, file), INPUT_ANNOTATED_IMAGES_FOLDER)
  drive.flush_and_unmount()  

In [None]:
#generate class.names
with open(f'{OUTPUT_TRAINING_FILES_FOLDER}/class.names', "w", encoding='utf-8') as file:
    file.write('\n'.join(CLASSES) + '\n')

In [None]:
#remove images without annotations
for file in os.listdir(INPUT_ANNOTATED_IMAGES_FOLDER):
  if file.endswith('.jpg') and not os.path.exists(f'{INPUT_ANNOTATED_IMAGES_FOLDER}/{os.path.splitext(file)[0]}.txt'):
    print(f'[Warning] It doesn\'t exist annotation file for image {file} and it will be removed!')
    os.remove(f'{INPUT_ANNOTATED_IMAGES_FOLDER}/{file}')

In [None]:
#generate train.txt and test.txt files

files = []

for file in os.listdir(INPUT_ANNOTATED_IMAGES_FOLDER):
  if file.endswith('.jpg'):
    files.append(file)
    
random.shuffle(files)

train_files = files[0:math.ceil(len(files)*DATA_TRAIN_PERCENTAGE)]
test_files = files[len(train_files):]

with open(f'{OUTPUT_TRAINING_FILES_FOLDER}/train.txt', 'w', encoding='utf-8') as train_txt:
  for file in train_files:
    train_txt.write(f'{PREFIX_DATA_FOLDER_FOR_DARKNET}/{file}\n')

with open(f'{OUTPUT_TRAINING_FILES_FOLDER}/test.txt', 'w', encoding='utf-8') as test_txt:
  for file in test_files:
    test_txt.write(f'{PREFIX_DATA_FOLDER_FOR_DARKNET}/{file}\n')

In [None]:
#generate labelled_data.data
with open(f'{OUTPUT_TRAINING_FILES_FOLDER}/labelled_data.data', 'w') as data:
  data.write('classes = ' + str(len(CLASSES)) + '\n')
  data.write('train = ' + PREFIX_DATA_FOLDER_FOR_DARKNET + '/' + 'train.txt' + '\n')
  data.write('valid = ' + PREFIX_DATA_FOLDER_FOR_DARKNET + '/' + 'test.txt' + '\n')
  data.write('names = ' + PREFIX_DATA_FOLDER_FOR_DARKNET + '/' + 'classes.names' + '\n')
  data.write('backup = ' + PREFIX_WEIGHTS_FOLDER_FOR_DARKNET)

In [None]:
#statistics
print(f'Number of train images: {len(train_files)}')
print(f'Number of test images: {len(test_files)}')

annotations = 0

for file in train_files:
  txt_file = f'{INPUT_ANNOTATED_IMAGES_FOLDER}/{os.path.splitext(file)[0]}.txt';
  annotations += sum(1 for line in open(txt_file))

print(f'Number of annotations: {annotations}')

In [None]:
#pack all and upload in Google Drive
DRIVE_MOUNT_FOLDER = 'drive'
DRIVE_UPLOAD_FOLDER = 'MyDrive/rc_car_tracking/yolo_dataset'

if IN_COLAB:
  from google.colab import drive
  drive.mount(DRIVE_MOUNT_FOLDER, force_remount=True)
  googe_drive_folder = os.path.join(DRIVE_MOUNT_FOLDER, DRIVE_UPLOAD_FOLDER)
  os.makedirs(googe_drive_folder, exist_ok=True)
  for file in os.listdir(INPUT_ANNOTATED_IMAGES_FOLDER):
    shutil.copy(os.path.join(INPUT_ANNOTATED_IMAGES_FOLDER, file), googe_drive_folder)
  for file in os.listdir(OUTPUT_TRAINING_FILES_FOLDER):
    shutil.copy(os.path.join(OUTPUT_TRAINING_FILES_FOLDER, file), googe_drive_folder)
  drive.flush_and_unmount()