## Importing Required Libraries

In [3]:
import os
import glob
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import shutil

## Data Preparation Functions

In [2]:
def create_directories(parent_dir):
    """
    Create necessary subdirectories in the specified parent directory.
    """
    # Define a list of directory paths to be created
    directories = [
        f'{parent_dir}/train/images',
        f'{parent_dir}/train/labels',
        f'{parent_dir}/test/images',
        f'{parent_dir}/test/labels'
    ]
    # Create the specified directories, and use exist_ok=True to avoid errors if they already exist
    for directory in directories:
        os.makedirs(directory, exist_ok=True)

def get_image_names_and_split(source_img_folder, test_size=0.14, random_state=2):
    """
    Get image names from the source directory, split them into training and testing sets,
    and return the sets.
    """
    images_list = glob.glob(f"{source_img_folder}*")
    images_id = [os.path.splitext(os.path.basename(img_path))[0] for img_path in images_list]

    img_train, img_test = train_test_split(images_id, test_size=test_size, random_state=random_state)
    return img_train, img_test


def copy_images_and_labels(source_img_folder, source_label_folder, img_data, des_label_folder, des_img_folder, ext=".JPG"):
    """
    Copy image and label files from source folders to destination folders.

    Parameters:
        source_img_folder (str): Path to the source image folder.
        source_label_folder (str): Path to the source label folder.
        img_data (list): List of image filenames to copy (Ids).
        des_label_folder (str): Path to the destination label folder.
        des_img_folder (str): Path to the destination image folder.
        ext (str, optional): File extension for image files (default is ".JPG").

    Returns:
        None
    """
    for img_filename in tqdm(img_data):
        #print(i,img_filename)
#         print(img_filename)
        img_folder_source =os.path.join(source_img_folder,img_filename)
        #print(img_folder_source)
        if len(des_img_folder)!=0:
            source = os.path.join(source_img_folder,img_filename+ext)
            #print(source)
            destination = des_img_folder


            shutil.copy(source, destination)

            source =  os.path.join( source_label_folder ,img_filename+".txt")
            destination = des_label_folder
            shutil.copy(source, destination)

##  main  Execution

In [None]:
def main():
    print("Creating directories...")
    parent_dir = "../Dataset"
    create_directories(parent_dir)
    print("Directories created.")

    source_img_folder = f'{parent_dir}Annotated_Dataset/images/'
    source_label_folder = f"{parent_dir}Annotated_Dataset/labels/"
    test_size = 0.14
    img_train, img_test = get_image_names_and_split(source_img_folder, test_size=test_size, random_state=2)



    print("Copying images and labels for the training set...")
    copy_images_and_labels(source_img_folder, source_label_folder, img_train, f'{parent_dir}/train/labels', f'{parent_dir}/train/images/', ext=".JPG")
    print("Training set copied.")

    print("Copying images and labels for the test set...")
    copy_images_and_labels(source_img_folder, source_label_folder, img_test, f'{parent_dir}/test/labels', f'{parent_dir}/test/images/', ext=".JPG")
    print("Test set copied.")


if __name__ == "__main__":
    main()

## Counting Files in Test Directories







In [None]:
!echo "Number of files in test/images:" && ls {parent_dir}/test/images/ | wc -l
!echo "Number of files in test/labels:" && ls {parent_dir}/test/labels/ | wc -l

## Counting Files in Train Directories


In [None]:
!echo "Number of files in train/images:" && ls {parent_dir}/train/images/ | wc -l
!echo "Number of files in train/labels:" && ls {parent_dir}/train/labels/ | wc -l