In [1]:
import os
import shutil
import random

In [8]:
random.seed(42)

def move_files(files, source_folder, target_folder, extension):
    """
    Move files with a specific extension from the source folder to the target folder.
    """
    for file in files:
        # replace the extension of the file with the new extension
        file = file.replace('.jpg', extension)
        shutil.move(f'{source_folder}/{file}', f'{target_folder}/{file}')
        #shutil.copy(f'{source_folder}/{file}', f'{target_folder}/{file}')
   

def move_dataset(source_path, target_path):  
    # Ensure the target directories exist
    image_train_dir = os.path.join(target_path, 'datasets', 'images', 'train')
    image_val_dir = os.path.join(target_path, 'datasets', 'images', 'val')
    label_train_dir = os.path.join(target_path, 'datasets', 'labels', 'train')
    label_val_dir = os.path.join(target_path, 'datasets', 'labels', 'val')


    os.makedirs(image_train_dir, exist_ok=True)
    os.makedirs(image_val_dir, exist_ok=True)
    os.makedirs(label_train_dir, exist_ok=True)
    os.makedirs(label_val_dir, exist_ok=True)

   
    # Only consider jpg files
    jpg_files = [file for file in os.listdir(source_path) if file.endswith('.jpg')]
    dataset_size = len(jpg_files)
    train_size = int(dataset_size * 0.8)
    val_size = (dataset_size - train_size)
    
    # Get the jpg files for each dataset split
    random.shuffle(jpg_files)
    train_files = jpg_files[:train_size]
    val_files = jpg_files[train_size:train_size + val_size]

    # Move images to appropriate folders
    move_files(train_files, source_path, image_train_dir, '.jpg')
    move_files(val_files, source_path, image_val_dir, '.jpg')

    # move labels to appropriate folders
    move_files(train_files, source_path, label_train_dir, '.txt')
    move_files(val_files, source_path, label_val_dir, '.txt')


# Parcels

In [10]:
soure_parcel_path = 'separate_datasets_inv4\\parcels'
target_parcel_path = 'separate_datasets_inv11\\parcels'
move_dataset(soure_parcel_path, target_parcel_path)

soure_parcel_path = 'separate_datasets_inv4\\parcels_2'
move_dataset(soure_parcel_path, target_parcel_path)

# Doors

In [11]:
source_door_path = 'separate_datasets_inv4/doors_processed'
target_door_path = 'separate_datasets_inv11/doors'
move_dataset(source_door_path, target_door_path)

source_door_path = 'separate_datasets_inv4/doors2_processed'
move_dataset(source_door_path, target_door_path)

# People

In [9]:
source_people_path = 'separate_datasets_inv4/person'
target_people_path = 'separate_datasets_inv11/person'
move_dataset(source_people_path, target_people_path)

# SVHN Numbers

In [12]:
source_numbers_path = 'separate_datasets_inv4/numbers_svhn'
target_numbers_path = 'separate_datasets_inv11/numbers_svhn'
move_dataset(source_numbers_path, target_numbers_path)