Script to split data into training and testing datasets.

## Load Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Import necessary libraries

In [None]:
import os
import shutil
import random
from tqdm import tqdm

## Enter values for Global Variables

In [None]:
def splitter(PARENT_FOLDER:str, TRAIN_DIR:str, TEST_DIR:str, TRAIN_SIZE:float) ->:
    classes = os.listdir(PARENT_FOLDER)

    os.makedirs(TRAIN_DIR, exist_ok=True)
    os.makedirs(TEST_DIR, exist_ok=True)

    for cls in tqdm(classes):
        os.makedirs(os.path.join(TRAIN_DIR, cls), exist_ok=True)
        os.makedirs(os.path.join(TEST_DIR, cls), exist_ok=True)
        images = os.listdir(os.path.join(PARENT_FOLDER, cls))
        random.shuffle(images)
        splitIdx = int(TRAIN_SIZE * len(images))
        trainImages = images[:splitIdx]
        testImages = images[splitIdx:]
        for img in trainImages:
            src = os.path.join(PARENT_FOLDER, cls, img)
            dest = os.path.join(TRAIN_DIR, cls, img)
            shutil.copy(src, dest)
        for img in testImages:
            src = os.path.join(PARENT_FOLDER, cls, img)
            dest = os.path.join(TEST_DIR, cls, img)
            shutil.copy(src, dest)

## Splitting ASL Dataset

In [None]:
PARENT_FOLDER = "/path/to/asl/dataset"
DESTINATION_FOLDER = "/path/to/dir/with/train/and/test/sets"
TRAIN_SIZE = 0.85 # 85% of dataset is reserved for training


TRAIN_DIR = os.path.join(DESTINATION_FOLDER, "train")
TEST_DIR = os.path.join(DESTINATION_FOLDER, "test")
splitter(PARENT_FOLDER=PARENT_FOLDER, TRAIN_DIR=TRAIN_DIR, TEST_DIR=TEST_DIR, TRAIN_SIZE=TRAIN_SIZE)

## Splitting ISL Dataset

In [None]:
PARENT_FOLDER = "/path/to/isl/dataset"
DESTINATION_FOLDER = "/path/to/dir/with/train/and/test/sets"
TRAIN_SIZE = 0.85 # 85% of dataset is reserved for training


TRAIN_DIR = os.path.join(DESTINATION_FOLDER, "train")
TEST_DIR = os.path.join(DESTINATION_FOLDER, "test")
splitter(PARENT_FOLDER=PARENT_FOLDER, TRAIN_DIR=TRAIN_DIR, TEST_DIR=TEST_DIR, TRAIN_SIZE=TRAIN_SIZE)