In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import os
import os.path as osp
from shutil import copyfile
from tqdm import tqdm
from natsort import natsorted

In [2]:
def split_dataset(prefix):
    image_prefix = f'dataset/images/{prefix}'
    label_prefix = f'dataset/labels/{prefix}'
    
    image_list = os.listdir(image_prefix)
    image_list = natsorted(image_list)
    label_list = os.listdir(label_prefix)
    label_list = natsorted(label_list)
    
    for i in range(len(image_list)):
        image_list[i] = osp.join(image_prefix, image_list[i])
    for i in range(len(label_list)):
        label_list[i] = osp.join(label_prefix, label_list[i])
        
    
    X_train, X_test, y_train, y_test = train_test_split(image_list, label_list, test_size=0.1, random_state=42)
    return X_train, X_test, y_train, y_test

In [3]:
os.makedirs('splited/train/images', exist_ok=True)
os.makedirs('splited/train/labels', exist_ok=True)
os.makedirs('splited/test/images', exist_ok=True)
os.makedirs('splited/test/labels', exist_ok=True)

In [4]:
X_train, X_test, y_train, y_test = split_dataset('flame')

for i in tqdm(range(len(X_train)), desc='split train dataset'):
    X_train_src = X_train[i]
    y_train_src = y_train[i]
    
    copyfile(X_train_src, osp.join('splited/train/images', osp.basename(X_train_src)))
    copyfile(y_train_src, osp.join('splited/train/labels', osp.basename(y_train_src)))
    
    
for i in tqdm(range(len(X_test)), desc='split test dataset'):
    X_test_src = X_test[i]
    y_test_src = y_test[i]
    
    copyfile(X_test_src, osp.join('splited/test/images', osp.basename(X_test_src)))
    copyfile(y_test_src, osp.join('splited/test/labels', osp.basename(y_test_src)))

split train dataset: 100%|██████████| 4705/4705 [00:03<00:00, 1279.32it/s]
split test dataset: 100%|██████████| 523/523 [00:00<00:00, 1348.32it/s]


In [5]:
X_train, X_test, y_train, y_test = split_dataset('smoke')

for i in tqdm(range(len(X_train)), desc='split train dataset'):
    X_train_src = X_train[i]
    y_train_src = y_train[i]
    
    copyfile(X_train_src, osp.join('splited/train/images', osp.basename(X_train_src)))
    copyfile(y_train_src, osp.join('splited/train/labels', osp.basename(y_train_src)))
    
    
for i in tqdm(range(len(X_test)), desc='split test dataset'):
    X_test_src = X_test[i]
    y_test_src = y_test[i]
    
    copyfile(X_test_src, osp.join('splited/test/images', osp.basename(X_test_src)))
    copyfile(y_test_src, osp.join('splited/test/labels', osp.basename(y_test_src)))

split train dataset: 100%|██████████| 9147/9147 [00:50<00:00, 180.13it/s]
split test dataset: 100%|██████████| 1017/1017 [00:05<00:00, 180.44it/s]
