In [1]:
import os 
from sklearn.model_selection import KFold
import shutil
from tqdm import tqdm
import labelme2yolo
import subprocess

In [2]:
tot_folds = 5
kf = KFold(n_splits=tot_folds, shuffle=False)

In [3]:
data_dir = os.getcwd() + '/training_data_3/'
file_list = [file for file in os.listdir(data_dir) if file.endswith('.jpg')]
train_jpg_files = []
test_jpg_files = []
for fold_idx, (train_indices, test_indices) in enumerate(kf.split(file_list)):
    train_jpg_files.append([file_list[i] for i in train_indices])
    test_jpg_files.append([file_list[i] for i in test_indices])

In [4]:
sum([len(x) for x in test_jpg_files])

91

In [5]:
train_json_files = [[filename.replace(".jpg", ".json") for filename in sublist] for sublist in train_jpg_files]
test_json_files = [[filename.replace(".jpg", ".json") for filename in sublist] for sublist in test_jpg_files]

In [6]:
def copy_select_files(source_dir, dest_dir, file_list):
    for filename in tqdm(file_list):
        if os.path.isfile(os.path.join(source_dir, filename)):
            source_path = os.path.join(source_dir, filename)
            destination_path = os.path.join(dest_dir, filename)
            shutil.copy2(source_path, destination_path)

In [7]:
def remove_files(directory_path):
    files = os.listdir(directory_path)
    for file_name in files:
        file_path = os.path.join(directory_path, file_name)
        if os.path.isfile(file_path):
            os.remove(file_path)    

In [8]:
def copy_all_files(source_dir, dest_dir):
    for filename in tqdm(os.listdir(source_dir)):
        if os.path.isfile(os.path.join(source_dir, filename)):
            source_path = os.path.join(source_dir, filename)
            destination_path = os.path.join(dest_dir, filename)
            shutil.copy2(source_path, destination_path)

In [9]:
#going over all the folds
source_dir = os.getcwd() + '/training_data_3'
for split in range(1):
    
    #create the training directory for that particular fold
    
    dest_dir = os.getcwd() + '/training_data_3_split_' + str(split+1) + '/'
    os.makedirs(dest_dir, exist_ok=True)
    
    #generate the training and test data for that fold for labelme2yolo
     
    copy_select_files(source_dir, dest_dir, test_jpg_files[split])
    copy_select_files(source_dir, dest_dir, test_json_files[split])
    subprocess.run('labelme2yolo --json_dir ./training_data_3_split_' + str(split+1) + '/ --val_size 0', shell=True)
    os.rename(dest_dir + 'YOLODataset', dest_dir + 'tmp')
    remove_files(dest_dir)
    
    copy_select_files(source_dir, dest_dir, train_jpg_files[split])
    copy_select_files(source_dir, dest_dir, train_json_files[split])
    subprocess.run('labelme2yolo --json_dir ./training_data_3_split_' + str(split+1) + '/ --val_size 0', shell=True)
    copy_select_files(dest_dir + 'tmp/images/train/', dest_dir + 'YOLODataset/images/val/', os.listdir(dest_dir + 'tmp/images/train/'))
    copy_select_files(dest_dir + 'tmp/labels/train/', dest_dir + 'YOLODataset/labels/val/', os.listdir(dest_dir + 'tmp/labels/train/'))
    shutil.rmtree(dest_dir + 'tmp')
    
    #now copy images for YOLO training
    source_train = dest_dir + 'YOLODataset/images/train'
    source_val = dest_dir + 'YOLODataset/images/val'

    train_dir = dest_dir + 'YOLODataset/datasets/train'
    val_dir = dest_dir + 'YOLODataset/datasets/val'
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)

    copy_all_files(source_train, train_dir)
    copy_all_files(source_val, val_dir)
    
    #now copy labels for YOLO training
    source_train = dest_dir + 'YOLODataset/labels/train'
    source_val = dest_dir + 'YOLODataset/labels/val'
    copy_all_files(source_train, train_dir)
    copy_all_files(source_val, val_dir)
    
    #changing yaml directory links
    with open(dest_dir + 'YOLODataset/dataset.yaml', 'r') as file:
        lines = file.readlines()
    lines[0] = 'train: ./datasets/train' + '\n'
    lines[1] = 'val: ./datasets/val' + '\n'
    with open(dest_dir + 'YOLODataset/dataset.yaml', 'w') as file:
        file.writelines(lines)

100%|██████████| 19/19 [00:00<00:00, 2153.42it/s]
100%|██████████| 19/19 [00:00<00:00, 3843.72it/s]
INFO:labelme2yolo:Converting train set ...
100%|██████████| 19/19 [00:03<00:00,  5.95it/s]
INFO:labelme2yolo:Converting val set ...
0it [00:00, ?it/s]
INFO:labelme2yolo:Converting test set ...
0it [00:00, ?it/s]
100%|██████████| 72/72 [00:00<00:00, 4567.30it/s]
100%|██████████| 72/72 [00:00<00:00, 3972.71it/s]
INFO:labelme2yolo:Converting train set ...
100%|██████████| 72/72 [00:11<00:00,  6.16it/s]
INFO:labelme2yolo:Converting val set ...
0it [00:00, ?it/s]
INFO:labelme2yolo:Converting test set ...
0it [00:00, ?it/s]
100%|██████████| 19/19 [00:00<00:00, 1415.51it/s]
100%|██████████| 19/19 [00:00<00:00, 4446.84it/s]
100%|██████████| 72/72 [00:00<00:00, 1539.82it/s]
100%|██████████| 19/19 [00:00<00:00, 1590.24it/s]
100%|██████████| 72/72 [00:00<00:00, 5673.62it/s]
100%|██████████| 19/19 [00:00<00:00, 5151.71it/s]


In [10]:
test_jpg_files[0]

['Post_Event_0053.jpg',
 'Post_Event_0020.jpg',
 'Post_Event_0038.jpg',
 'Post_Event_0066.jpg',
 'Post_Event_0081.jpg',
 'Post_Event_0000.jpg',
 'Post_Event_0074.jpg',
 'Post_Event_0057.jpg',
 'Post_Event_0058.jpg',
 'Post_Event_0029.jpg',
 'Post_Event_0042.jpg',
 'Post_Event_0028.jpg',
 'Post_Event_0077.jpg',
 'Post_Event_0072.jpg',
 'Post_Event_0082.jpg',
 'Post_Event_0051.jpg',
 'Post_Event_0044.jpg',
 'Post_Event_0005.jpg',
 'Post_Event_0016.jpg']