In [5]:
from sklearn.model_selection import StratifiedKFold
import numpy as np
import cv2
import os

In [6]:
imagepaths = []
categories_set = set()
for root, dirs, files in os.walk("Folds_Dataset_Final", topdown=False): 
    for name in files:
        path = os.path.join(root, name)
        if name.startswith('c'):
            continue
        if path.endswith("PNG"): # We want only the images
            imagepaths.append(path)
            categories_set.add(os.path.split(root)[1])
categories_list = list(sorted(categories_set))

categories = dict(zip(categories_list, range(len(categories_list))))
print(categories)

print(len(imagepaths))

{'1': 0, '2': 1, '4': 2, '5': 3, '7': 4, '9': 5, 'A': 6, 'Adulto': 7, 'America': 8, 'Aviao': 9, 'B': 10, 'C': 11, 'Casa': 12, 'D': 13, 'E': 14, 'F': 15, 'G': 16, 'Gasolina': 17, 'I': 18, 'Identidade': 19, 'Junto': 20, 'L': 21, 'Lei': 22, 'M': 23, 'N': 24, 'O': 25, 'P': 26, 'Palavra': 27, 'Pedra': 28, 'Pequeno': 29, 'Q': 30, 'R': 31, 'S': 32, 'T': 33, 'U': 34, 'V': 35, 'Verbo': 36, 'W': 37, 'X': 38, 'Y': 39}
4800


In [8]:
X = []
y = []
for path in imagepaths:
    img = cv2.imread(path) # Reads image and returns np.array
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Converts into the corret colorspace (GRAY)
    # img = cv2.resize(img, (320, 120)) # Reduce image size so training can be faster
    X.append(img)

    # Processing label in image path
    category = os.path.split(os.path.split(path)[0])[1]
    label = categories[category]
    y.append(label)

# Turn X and y into np.array to speed up train_test_split
X = np.array(X, dtype="uint8")
X = X.reshape(len(imagepaths), 50, 50, 1) # Needed to reshape so CNN knows it's different images
y = np.array(y)
X.shape, y.shape

((4800, 50, 50, 1), (4800,))

In [9]:
np.save("raw_X_Lasic.npy", X)
np.save("raw_y_Lasic.npy", y)

In [10]:
seed = 1234

In [11]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

In [15]:
for i, (train, test) in enumerate(kfold.split(X, y), 1):
    np.save(f"lasic_data/X_{i:02d}_Lasic.npy", X)
    np.save(f"lasic_data/y_{i:02d}_Lasic.npy", y)