In [None]:
from sklearn.model_selection import train_test_split
import os
import shutil
import numpy as np

## Splitting Images and Labels for Training, Validation, and Testing

Before using the code below, make sure your directory structure is organized as follows:

- Root Directory
  - Train_Test_Split.ipynb
  - Data
    - images
      - Original images
    - labels
      - Original labels
    - train
      - images (Empty Folder)
      - labels (Empty Folder)
    - valid
      - images (Empty Folder)
      - labels (Empty Folder)
    - test
      - images (Empty Folder)
      - labels (Empty Folder)


In [None]:
# Get the path of each label
lbls = []
for lbl_file in os.listdir("Data/labels"):
    lbls.append("Data/labels/"+lbl_file)

# Get the path of each image
imgs = []
for img_file in os.listdir("Data/images"):
    imgs.append("Data/images/"+img_file)

In [None]:
# Test sample size
test_size = int(np.ceil((len(imgs)*0.1)))
# Validation sample size
valid_size = int(np.ceil((len(imgs)*0.2)))

In [None]:
# Do the train test split 90% Train, 10% Test
X_train, X_test, y_train, y_test = train_test_split(imgs, lbls, test_size=test_size, random_state=42)

# Do the Validate split 25% of the training dataset
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=valid_size, random_state=42)

# Train: 70%
# Valid: 20%
# Test: 10%

In [None]:
print(f"Train Images: {len(X_train)}")
print(f"Train Labels: {len(y_train)}\n")

print(f"Valid Images: {len(X_valid)}")
print(f"Valid Labels: {len(y_valid)}\n")

print(f"Test Images: {len(X_test)}")
print(f"Test Labels: {len(y_test)}")

Train Images: 7406
Train Labels: 7406

Valid Images: 2116
Valid Labels: 2116

Test Images: 1058
Test Labels: 1058


In [None]:
print(f"{X_train[15]}\n")

print(y_train[15])

Data/images/Brightness_Contrast_ck0tzinm26vvi0848hrzkprh2_jpeg.rf.8c764735a9990557e147153fcd4506ce.jpg

Data/labels/4-multi_objects_120_jpg.rf.7e9d5a6857b216c433e8bb6b94a3c76b.txt


In [None]:
# Copy Training images to  their directory
for img in X_train:
    shutil.copy(f"{img}", f"Data/train/images")

# Copy Training labels to  their directory
for lbl in y_train:
    shutil.copy(f"{lbl}", f"Data/train/labels")

# Copy Validate images to their directory
for img in X_valid:
    shutil.copy(f"{img}", f"Data/valid/images")

# Copy Validate labels to their directory
for lbl in y_valid:
    shutil.copy(f"{lbl}", f"Data/valid/labels")

# Copy Test images to  their directory
for img in X_test:
    shutil.copy(f"{img}", f"Data/test/images")

# Copy Test labels to their directory
for lbl in y_test:
    shutil.copy(f"{lbl}", f"Data/test/labels")