In [1]:
import numpy as np
import tensorflow as tf
from keras.utils import to_categorical

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam

import cv2 as cv
from time import sleep
import os

import copy
import itertools
import mediapipe as mp




In [2]:
RANDOM_SEED = 42
NUM_CLASSES = 32

In [22]:
train_data = "D:/binary/latters"
test_data = "D:/new-archive/datasets/valid/"



In [4]:
X_train, X_test = [], []
y_train, y_test = [], []

In [5]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=True,
    max_num_hands=2,
    min_detection_confidence=0.3,
    min_tracking_confidence=0.5,
)

In [6]:
def get_normalized_brect(brect, img):
    dh, dw, _ = img.shape
    x, y, w, h = brect

    l = int((x - w / 2) * dw)
    r = int((x + w / 2) * dw)
    t = int((y - h / 2) * dh)
    b = int((y + h / 2) * dh)
    
    if l < 0:
        l = 0
    if r > dw - 1:
        r = dw - 1
    if t < 0:
        t = 0
    if b > dh - 1:
        b = dh - 1

    return [l, t, r, b]

In [7]:
def calc_bounding_rect(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_array = np.empty((0, 2), int)

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)

        landmark_point = [np.array((landmark_x, landmark_y))]

        landmark_array = np.append(landmark_array, landmark_point, axis=0)

    x, y, w, h = cv.boundingRect(landmark_array)

    return [x, y, x + w, y + h]


def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        # landmark_z = landmark.z

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point

In [8]:
def get_brect(image):
    debug_image = image.copy()
    results = hands.process(image)
    print(results.multi_hand_landmarks)
    brects = []
    #  ####################################################################
    if results.multi_hand_landmarks is not None:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks,
                                                results.multi_handedness):
            # Bounding box calculation
            brect = calc_bounding_rect(debug_image, hand_landmarks)
            brects.append(brect)
    return brects

In [16]:
#converting from gbr to hsv color space
def convert_to_hsv(img, brects):
    for rect in brects:
        x, y, w, h = get_normalized_brect(rect, img)

        mask = 255 * np.ones(shape=img.shape, dtype=np.uint8)
        mask[y:y+h, x:x+w] = img[y:y+h, x:x+w]

    img = mask

    img_HSV = cv.cvtColor(img, cv.COLOR_BGR2HSV)
    #skin color range for hsv color space 
    HSV_mask = cv.inRange(img_HSV, (0, 15, 0), (17,170,255)) 
    HSV_mask = cv.morphologyEx(HSV_mask, cv.MORPH_OPEN, np.ones((3,3), np.uint8))

    #converting from gbr to YCbCr color space
    img_YCrCb = cv.cvtColor(img, cv.COLOR_BGR2YCrCb)
    #skin color range for hsv color space 
    YCrCb_mask = cv.inRange(img_YCrCb, (0, 135, 85), (255,180,135)) 
    YCrCb_mask = cv.morphologyEx(YCrCb_mask, cv.MORPH_OPEN, np.ones((3,3), np.uint8))

    #merge skin detection (YCbCr and hsv)
    global_mask=cv.bitwise_and(YCrCb_mask,HSV_mask)
    global_mask=cv.medianBlur(global_mask,3)
    global_mask = cv.morphologyEx(global_mask, cv.MORPH_OPEN, np.ones((4,4), np.uint8))

    HSV_result = cv.bitwise_not(HSV_mask)
    YCrCb_result = cv.bitwise_not(YCrCb_mask)
    global_result=cv.bitwise_not(global_mask)

    # results = [mask, YCrCb_result, global_result]
    # return global_result
    # for result in results:
    #     cv.imshow("test", result)
    #     key = cv.waitKey(10)
    #     if key == 27:
    #         break
    #     sleep(2)
    # cv.destroyAllWindows()  
    return global_result

In [10]:
def convert_to_float(x):
    result = []
    for arr in x:
        result.append([float(y) for y in arr])
    return result

In [12]:
def load_data(data_dir):
    images_dir = f"{data_dir}/images/"
    labels_dir = f"{data_dir}/labels/"

    images = os.listdir(images_dir)
    labels = os.listdir(labels_dir)

    X_data = []
    y_data = []

    for (name, label) in zip(images, labels):
        img = cv.imread(os.path.join(images_dir, name))
        img = cv.resize(img, (312, 288))

        labels = open(os.path.join(labels_dir, label), "r").readlines()

        brect_cords = [x.split(' ')[1::] for x in labels]
        brect_cords = convert_to_float(brect_cords)
        
        image_class = int(name.split('_')[0])
        
        # convert_to_hsv(img, brect_cords)
        processed_image = convert_to_hsv(img, brect_cords)
        cv.imwrite(os.path.join(data_dir, f"processed/{name}"), processed_image)
        img = cv.imread(os.path.join(data_dir, f"processed/{name}"))
        X_data.append(img)
        y_data.append(image_class)
    
    X_data = np.array(X_data)
    y_data = np.array(y_data)
    return X_data, y_data

In [33]:
letters = os.listdir(train_data)

class_label = 0
X_train, y_train = [], []
x = 0
y = 0 
c = 0

for letter in letters:

    images = os.listdir(os.path.join(train_data, letter))
    for image in images:
        img = cv.imread(f"{train_data}/{letter}/{image}")
        # x += img.shape[0]
        # y += img.shape[1]
        # c += 1
        # break
        img = cv.resize(img, (235,150))
        X_train.append(img)
        y_train.append(class_label)
    class_label += 1

In [34]:
X_train = np.array(X_train)
y_train = np.array(y_train)

In [17]:
X_train, y_train = load_data(train_data)
# X_test, y_test = load_data(test_data)

In [35]:
print(X_train.shape)

(64000, 150, 235, 3)


In [None]:
print(X_train[0][0].shape)

(312,)


In [39]:
input_shape = (150, 235, 3)

model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(96, (3, 3), activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(150, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(NUM_CLASSES, activation='softmax'))

In [40]:
model.compile(optimizer=Adam(learning_rate=0.001), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=['accuracy'])

In [41]:
model.fit(X_train, y_train, epochs=15, batch_size=32, verbose=1)

Epoch 1/15

KeyboardInterrupt: 