In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from tqdm import tqdm

from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, KFold, train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import pickle, shutil, random

In [41]:
BASE_PATH = Path('/project/volume/data/out/')
EMOREACT = Path('EmoReact')
FER = Path('FER-2013')
KDEF = Path('KDEF-AKDEF')
NIMH = Path('NIMH-CHEFS')

In [43]:
CURRENT_PATH = BASE_PATH / FER
LABELS = [f.name for f in CURRENT_PATH.iterdir() if f.is_dir()]

train_prop = 0.6
test_prop = 0.2
valid_prop = 0.2

number_of_images = len(list(CURRENT_PATH.rglob('*.jpg')))

n_train = int((number_of_images * train_prop) + 0.5)
n_valid = int((number_of_images * valid_prop) + 0.5)
n_test = number_of_images - n_train - n_valid

print(f"[INFO] Total number of images ... {str(number_of_images)}")
print(f"[INFO] Number of images used in training ... {str(n_train)} ({str(train_prop * 100)}%)")
print(f"[INFO] Number of images used in validation ...{str(n_valid)} ({str(valid_prop * 100)}%)")
print(f"[INFO] Number of images used in testing ... {str(n_test)} ({str(test_prop * 100)}%)")

[INFO] Total number of images ... 35887
[INFO] Number of images used in training ... 21532 (60.0%)
[INFO] Number of images used in validation ...7177 (20.0%)
[INFO] Number of images used in testing ... 7178 (20.0%)


In [44]:
def split_and_move():

    print(f"[INFO] Splitting files in train - test - val sets for each of {len(LABELS)} labels ...")
    for label in tqdm(LABELS):
        
        # create paths for each label
        folder_path = CURRENT_PATH / label
        train_destination = CURRENT_PATH / "train" / label
        val_destination = CURRENT_PATH / "val" / label
        test_destination = CURRENT_PATH / "test" / label

        # create the directories eg. "train / happy"
        train_destination.mkdir(parents=True, exist_ok=True)
        val_destination.mkdir(parents=True, exist_ok=True)
        test_destination.mkdir(parents=True, exist_ok=True)

        # get all the jpgs in the label file
        files = list(folder_path.rglob('*.jpg'))
        random.shuffle(files)

        train_n = (int((len(files) * train_prop) + 0.5))
        val_n = (int((len(files) * valid_prop) + 0.2))

        for file_idx, file in enumerate(files):
            if file_idx < train_n:
                shutil.move(str(file), train_destination)
            elif file_idx < train_n + val_n:
                shutil.move(str(file), val_destination)
            else:
                shutil.move(str(file), test_destination)

        if folder_path.exists() and folder_path.is_dir():
            shutil.rmtree(folder_path)

    print(f"[INFO] DONE ...")


In [45]:
split_and_move()

[INFO] Splitting files in train - test - val sets for each of 7 labels ...


100%|███████████████████████████████████████████████████████████████| 7/7 [01:19<00:00, 11.39s/it]

[INFO] DONE ...



