In [1]:
from sklearn.model_selection import StratifiedKFold
import numpy as np
import cv2
import os

In [2]:
imagepaths = []
categories_set = set()
for root, dirs, files in os.walk("leapGestRecog", topdown=False): 
    for name in files:
        path = os.path.join(root, name)
        #if name.startswith('c'):
        #    continue
        if path.upper().endswith("PNG"): # We want only the images
            imagepaths.append(path)
            categories_set.add(os.path.split(root)[1])
categories_list = list(sorted(categories_set))

categories = dict(zip(categories_list, range(len(categories_list))))
print(categories)

print(len(imagepaths))

{'01_palm': 0, '02_l': 1, '03_fist': 2, '04_fist_moved': 3, '05_thumb': 4, '06_index': 5, '07_ok': 6, '08_palm_moved': 7, '09_c': 8, '10_down': 9}
20000


In [3]:
X = []
y = []
for path in imagepaths:
    img = cv2.imread(path) # Reads image and returns np.array
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Converts into the corret colorspace (GRAY)
    img = cv2.resize(img, (320, 120)) # Reduce image size so training can be faster
    X.append(img)

    # Processing label in image path
    category = os.path.split(os.path.split(path)[0])[1]
    label = categories[category]
    y.append(label)

# Turn X and y into np.array to speed up train_test_split
X = np.array(X, dtype="uint8")
X = X.reshape(len(imagepaths), 320, 120, 1) # Needed to reshape so CNN knows it's different images
y = np.array(y)
X.shape, y.shape

((20000, 320, 120, 1), (20000,))

In [4]:
np.save("raw_X_Leap.npy", X)
np.save("raw_y_Leap.npy", y)

In [5]:
seed = 1234

In [6]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

In [8]:
for i, (train, test) in enumerate(kfold.split(X, y), 1):
    np.save(f"Leap_K_Fold/train_Leap_{i:02d}.npy", train)
    np.save(f"Leap_K_Fold/test_Leap_{i:02d}.npy", test)