<a href="https://colab.research.google.com/github/andreaderuvo/rc_car_tracking/blob/main/C3_generate_training_files_for_yolo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
PROJECT = 'rc_car_tracking'
MODEL = 'yolo'

COLAB_BASE_PROJECT = f'/content/{PROJECT}'
DRIVE_MOUNT_FOLDER = '/content/drive'
DRIVE_BASE_PROJECT = f'{DRIVE_MOUNT_FOLDER}/MyDrive/{PROJECT}'
DRIVE_ANNOTATED_IMAGES_FOLDER = f'{COLAB_BASE_PROJECT}/{MODEL}_annotated_images'
DRIVE_TRAINING_FILES_FOLDER = f'{COLAB_BASE_PROJECT}/{MODEL}_training_files'

CLASSES = ['rc_car']
DATA_TRAIN_PERCENTAGE = 0.8
PREFIX_DATA_FOLDER_FOR_DARKNET = f'{COLAB_BASE_PROJECT}/{MODEL}_dataset'
PREFIX_WEIGHTS_FOLDER_FOR_DARKNET = f'{COLAB_BASE_PROJECT}/{MODEL}_trained_weights'

In [None]:
import os
import random
import math
import shutil

In [None]:
#mount Google Drive
from google.colab import drive
drive.mount(DRIVE_MOUNT_FOLDER, force_remount=True)
!ln -s $DRIVE_BASE_PROJECT $COLAB_BASE_PROJECT

In [None]:
#delete and create folder INPUT_ANNOTATED_IMAGES_FOLDER and OUTPUT_TRAINING_FILES_FOLDER
shutil.rmtree(DRIVE_TRAINING_FILES_FOLDER, ignore_errors=True)
os.makedirs(DRIVE_TRAINING_FILES_FOLDER, exist_ok=True)

In [None]:
#generate class.names
with open(f'{DRIVE_TRAINING_FILES_FOLDER}/classes.names', "w", encoding='utf-8') as file:
    file.write('\n'.join(CLASSES) + '\n')

In [None]:
#remove images without annotations
for file in os.listdir(DRIVE_ANNOTATED_IMAGES_FOLDER):
  if file.endswith('.jpg') and not os.path.exists(f'{DRIVE_ANNOTATED_IMAGES_FOLDER}/{os.path.splitext(file)[0]}.txt'):
    print(f'[Warning] It doesn\'t exist annotation file for image {file} and it will be removed!')
    os.remove(f'{DRIVE_ANNOTATED_IMAGES_FOLDER}/{file}')

In [None]:
#generate train.txt and test.txt files
files = []

for file in os.listdir(DRIVE_ANNOTATED_IMAGES_FOLDER):
  if file.endswith('.jpg'):
    files.append(file)
    
random.shuffle(files)

train_files = files[0:math.ceil(len(files)*DATA_TRAIN_PERCENTAGE)]
test_files = files[len(train_files):]

with open(f'{DRIVE_TRAINING_FILES_FOLDER}/train.txt', 'w', encoding='utf-8') as train_txt:
  for file in train_files:
    train_txt.write(f'{PREFIX_DATA_FOLDER_FOR_DARKNET}/{file}\n')

with open(f'{DRIVE_TRAINING_FILES_FOLDER}/test.txt', 'w', encoding='utf-8') as test_txt:
  for file in test_files:
    test_txt.write(f'{PREFIX_DATA_FOLDER_FOR_DARKNET}/{file}\n')

In [None]:
#generate labelled_data.data
with open(f'{DRIVE_TRAINING_FILES_FOLDER}/labelled_data.data', 'w') as data:
  data.write('classes = ' + str(len(CLASSES)) + '\n')
  data.write('train = ' + PREFIX_DATA_FOLDER_FOR_DARKNET + '/' + 'train.txt' + '\n')
  data.write('valid = ' + PREFIX_DATA_FOLDER_FOR_DARKNET + '/' + 'test.txt' + '\n')
  data.write('names = ' + PREFIX_DATA_FOLDER_FOR_DARKNET + '/' + 'classes.names' + '\n')
  #data.write('backup = ' + PREFIX_WEIGHTS_FOLDER_FOR_DARKNET)

In [None]:
#statistics
print(f'Number of train images: {len(train_files)}')
print(f'Number of test images: {len(test_files)}')

annotations = 0

for file in train_files:
  txt_file = f'{DRIVE_ANNOTATED_IMAGES_FOLDER}/{os.path.splitext(file)[0]}.txt';
  annotations += sum(1 for line in open(txt_file))

print(f'Number of annotations: {annotations}')