# Imports

In [11]:
%matplotlib inline

import time
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import tensorflow as tf
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
!pip install imageio_ffmpeg
import imageio
from io import BytesIO

# Global variables

In [12]:
WIDTH = 120
HEIGHT = 120
OS = "linux"
valid_size = 0.2
label_name = ["palm_horizontal", "L", "fist_horizontal", "fist_vertical", "thumb_up", "index", "ok", "palm_vertical", "C", "thumb_down"]

# Loading data

In [13]:
def load_labelling_data(width, height, oper_sys):
    X = []
    y = []
    stop = False
    if oper_sys == "windows":
        split_ = "\\"
    else:
        split_ = "/"
    for root, _, files in tqdm(os.walk("../", topdown=False)): 
        for name in files:
            path = os.path.join(root, name)
            if path.endswith("jpg"):
                # if path.split(split_)[-1][0] != ".":
                if path.split(split_)[-1][0].isalpha():
                    # Loading images
                    img = cv2.imread(path)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    img = cv2.resize(img, (width, height))
                    X.append(img)
                    # Loading labels
                    category = path.split(split_)[-1].split("_")[0]
                    # label = int(category.split("_")[0]) - 1
                    y.append(category)
    X = np.array(X)
    y = np.array(y)
    return X.reshape(X.shape[0], height, width, 1), y

def load_test_data(width, height, oper_sys):
    X = []
    stop = False
    if oper_sys == "windows":
        split_ = "\\"
    else:
        split_ = "/"
    for root, _, files in tqdm(os.walk("../", topdown=False)): 
        for name in files:
            path = os.path.join(root, name)
            if path.endswith("jpg"):
                # if path.split(split_)[-1][0] != ".":
                if not path.split(split_)[-1][0].isalpha() and path.split(split_)[-1][0] != ".":
                    # Loading images
                    img = cv2.imread(path)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    img = cv2.resize(img, (width, height))
                    X.append(img)
    X = np.array(X)
    return X.reshape(X.shape[0], height, width, 1)

def load_dataset_data(width, height, oper_sys):
    X = []
    y = []
    stop = False
    if oper_sys == "windows":
        split_ = "\\"
    else:
        split_ = "/"
    for root, dirs, files in tqdm(os.walk(".", topdown=False)): 
        for name in files:
            path = os.path.join(root, name)
            if path.endswith("png"):
                # Loading labels
                category = path.split(split_)[4]
                label = int(category.split("_")[0]) - 1
                y.append(label)
                # Loading images
                img = cv2.imread(path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                img = cv2.resize(img, (width, height))
                X.append(img)
    X = np.array(X)
    y = np.array(y)
    return X.reshape(X.shape[0], height, width, 1), y

def get_background_images():
	images = []
	with open('../input/hand-gesture/data_background/test.mkv', 'rb') as file: 
		content = file.read()
	vid = imageio.get_reader(BytesIO(content),  'ffmpeg')
	images = []
	for num, image in enumerate(vid.iter_data()): 
		if num % 14 == 0:
			img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)[:,:,np.newaxis]
			images.append(img)
	return np.array(images)

In [14]:
X_labelling, y_labelling = load_labelling_data(WIDTH, HEIGHT, OS)
background_images = get_background_images()

In [15]:
print(X_labelling.shape)
print(y_labelling.shape)
print(background_images.shape)

# Data augmentation

In [16]:
plt.imshow(X_labelling[0])
plt.show()
plt.imshow(background_images[0])
plt.show()

In [17]:
def add_background_to_data(background_images, hand_images, labels):
    final_images = []
    new_labels = []
    for idx, hand_img in enumerate(tqdm(hand_images)):
        for background_img in background_images:
            for r in range(1, 4):
                resize = int(hand_img.shape[0]*r)
                hand_img_resized = cv2.resize(hand_img, (resize, resize))[:,:,np.newaxis]
                y_offset = np.random.randint(background_img.shape[0] - hand_img_resized.shape[0])
                x_offset = np.random.randint(background_img.shape[1] - hand_img_resized.shape[1])
                tmp = background_img.copy()
                tmp[y_offset:y_offset+hand_img_resized.shape[0], x_offset:x_offset+hand_img_resized.shape[1]] = hand_img_resized
                tmp = cv2.resize(tmp, (128, 128))[:,:,np.newaxis]
                final_images.append(tmp)
                new_labels.append(labels[idx])
    return np.array(final_images), np.array(new_labels)

In [18]:
X, y = add_background_to_data(background_images, X_labelling, y_labelling)
print(X.shape)
print(y.shape)

In [19]:
plt.imshow(X[1000])

In [20]:
label_names, y = np.unique(y, return_inverse=True) ## Converts to categorical int
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=valid_size, random_state=42)

In [21]:
X_train.shape

# Building model

In [22]:
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(32, (5, 5), activation='relu', input_shape=(X_train[0].shape))) 
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu')) 
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((2, 2)))
    model.add(tf.keras.layers.Dropout(rate=0.2))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(128, activation='relu'))
    model.add(tf.keras.layers.Dense(len(label_name), activation='softmax'))
    return model

# Training model

In [23]:
train_ = True

if train_ :
    model = build_model()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=50, batch_size=128, verbose=5, validation_data=(X_valid, y_valid))
    model.save('tmp_model.h5')
else:
    model = tf.keras.models.load_model("tmp_model.h5", custom_objects=None, compile=True, options=None)

model.summary()
valid_loss, valid_acc = model.evaluate(X_valid, y_valid)
print('Valid accuracy: {:2.2f}%'.format(valid_acc*100))

# Inference

In [24]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper left')
plt.show()

In [25]:
X_test = load_test_data(128, 128, "linux")

In [26]:
plt.imshow(X_test[0])

In [27]:
preds = np.argmax(model.predict(X_test), axis=1)
label_names

In [28]:
for i in range(len(preds)):
    print("=======================================")
    plt.imshow(X_test[i])
    plt.show()
    print(label_names[preds[i]])