In [1]:
from sklearn.model_selection import StratifiedKFold
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt

In [2]:
imagepaths = []
categories_set = set()
for root, dirs, files in os.walk("IN_2i", topdown=False): 
    for name in files:
        path = os.path.join(root, name)
        if path.endswith("jpg"): # We want only the images
            imagepaths.append(path)
            categories_set.add(os.path.split(root)[1].split('_')[-1])
categories_list = list(sorted(categories_set))

categories = dict(zip(categories_list, range(len(categories_list))))
print(categories)

print(len(imagepaths))

{'0': 0, '1': 1, '10': 2, '11': 3, '12': 4, '13': 5, '14': 6, '15': 7, '16': 8, '17': 9, '18': 10, '19': 11, '2': 12, '20': 13, '21': 14, '22': 15, '23': 16, '24': 17, '25': 18, '26': 19, '27': 20, '28': 21, '29': 22, '3': 23, '30': 24, '31': 25, '32': 26, '33': 27, '34': 28, '35': 29, '36': 30, '37': 31, '38': 32, '39': 33, '4': 34, '40': 35, '41': 36, '42': 37, '43': 38, '44': 39, '45': 40, '46': 41, '47': 42, '48': 43, '49': 44, '5': 45, '50': 46, '51': 47, '52': 48, '53': 49, '54': 50, '55': 51, '56': 52, '57': 53, '58': 54, '59': 55, '6': 56, '60': 57, '7': 58, '8': 59, '9': 60}
1220


In [4]:
max_size = np.array([188, 141])

In [6]:
X = []
y = []
max_h, max_w = max_size
img_size = tuple(max_size * 2)
for path in imagepaths:
    img = cv2.imread(path) # Reads image and returns np.array
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Converts into the corret colorspace (GRAY)
    img_h, img_w = img.shape
    h, w = (max_h - img_h) // 2, (max_w - img_w) // 2
    img = np.hstack((np.zeros((img_h, w)), img, np.zeros((img_h, max_w - img_w - w))))
    img = np.vstack((np.zeros((h, max_w)), img, np.zeros((max_h - img_h - h, max_w))))
    img = cv2.resize(img, img_size) # Reduce image size so training can be faster
    X.append(img)

    # Processing label in image path
    category = os.path.split(os.path.split(path)[0])[1].split('_')[-1]
    label = categories[category]
    y.append(label)

# Turn X and y into np.array to speed up train_test_split
X = np.array(X, dtype="uint8")
X = X.reshape(len(imagepaths), *img_size[::-1], 1) # Needed to reshape so CNN knows it's different images
y = np.array(y)
X.shape, y.shape

((1220, 282, 376, 1), (1220,))

In [7]:
np.save("raw_X_UFPR.npy", X)
np.save("raw_y_UFPR.npy", y)

In [31]:
seed = 1234

In [32]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

In [33]:
for i, (train, test) in enumerate(kfold.split(X, y), 1):
    np.save(f"UFPR_K_Fold/train_UFPR_{i:02d}.npy", train)
    np.save(f"UFPR_K_Fold/test_UFPR_{i:02d}.npy", test)

In [34]:
train.shape

(1098,)

In [35]:
test.shape

(122,)

In [36]:
X.shape

(1220, 376, 282, 1)

In [37]:
train.size + test.size

1220

In [38]:
X[train].shape

(1098, 376, 282, 1)

In [39]:
len(np.unique(y))

61