# import dependencies

In [None]:
from os import listdir
from os import makedirs

from cv2 import imshow
from cv2 import putText
from cv2 import waitKey
from cv2 import rectangle
from cv2 import VideoCapture
from cv2 import destroyAllWindows
from cv2 import FONT_HERSHEY_SIMPLEX

from numpy import save
from numpy import load
from numpy import array
from numpy import argmax
from numpy import concatenate
from numpy import expand_dims

from sklearn.model_selection import train_test_split

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model

from tensorflowjs.converters import save_keras_model

import mediapipe
Holistic = mediapipe.solutions.holistic.Holistic
POSE_CONNECTIONS = mediapipe.solutions.holistic.POSE_CONNECTIONS
HAND_CONNECTIONS = mediapipe.solutions.holistic.HAND_CONNECTIONS
draw_landmarks = mediapipe.solutions.drawing_utils.draw_landmarks

# define variables

In [None]:
SIGNS = {
    0 : "-",
    1 : "hola",
    2 : "como estas",
    3 : "bien",
    4 : "mal",
    5 : "con permiso",
    6 : "gracias",
    7 : "de nada",
    8 : "por favor",
    9 : "perdon",
    10: "adios",
    11: "cuidate",
    12: "nos vemos",
    13: "te quiero"
}

In [None]:
SAMPLES = 5000

# define functions

In [None]:
def show_image(image, text=None, landmarks=None):

    if text:
        rectangle(image, (image.shape[1], image.shape[0]), (0, image.shape[0] - 40), (0, 0, 0), -1)
        putText(image, text.upper(), (10, image.shape[0] - 10), FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3)

    if landmarks:
        draw_landmarks(image, landmarks.pose_landmarks, POSE_CONNECTIONS)
        draw_landmarks(image, landmarks.left_hand_landmarks, HAND_CONNECTIONS)
        draw_landmarks(image, landmarks.right_hand_landmarks, HAND_CONNECTIONS)

    imshow("Video Capture", image)

In [None]:
def get_keypoints(landmarks):

    pose = array([[lm.x, lm.y, lm.visibility] for lm in landmarks.pose_landmarks.landmark]).flatten() if landmarks.pose_landmarks else array(3 * 33 * [-1.0])
    left = array([[lm.x, lm.y, lm.z] for lm in landmarks.left_hand_landmarks.landmark]).flatten() if landmarks.left_hand_landmarks else array(3 * 21 * [-1.0])
    right = array([[lm.x, lm.y, lm.z] for lm in landmarks.right_hand_landmarks.landmark]).flatten() if landmarks.right_hand_landmarks else array(3 * 21 * [-1.0])

    return concatenate([pose, left, right])

# test tracker

In [None]:
capture = VideoCapture(0)

with Holistic() as tracker:
    while capture.isOpened():

        _, image = capture.read()

        landmarks = tracker.process(image)

        prediction = "test"

        show_image(image, prediction, landmarks)

        if waitKey(1) & 0xFF == ord("q"):
            break

capture.release()
destroyAllWindows()

# create dataset

In [None]:
for sign in SIGNS.values():
    makedirs(f"dataset/{sign}")

In [None]:
capture = VideoCapture(0)

with Holistic() as tracker:
    for sign in listdir("dataset"):
        for sample in range(SAMPLES):

            _, image = capture.read()

            landmarks = tracker.process(image)

            save(f"dataset/{sign}/{sample}", get_keypoints(landmarks))

            if sample == 0:
                show_image(image, f"{sign} -", landmarks)
                waitKey(10000)
            else:
                show_image(image, f"{sign} {sample}", landmarks)

            if waitKey(1) & 0xFF == ord("q"):
                break

capture.release()
destroyAllWindows()

In [None]:
to_index = dict(zip(SIGNS.values(), SIGNS.keys()))
signs, indexes = list(), list()

for sign in listdir("dataset"):
    for sample in listdir(f"dataset/{sign}"):

        signs.append(load(f"dataset/{sign}/{sample}"))
        indexes.append(to_index[sign])

x = array(signs)
y = to_categorical(indexes)

x_train, x_test, y_train, y_test = train_test_split(x, y)

# create translator

In [None]:
ip = x.shape[1]
op = y.shape[1]

In [None]:
translator = Sequential(name="translator", layers=[
    Input(name="layer_0", shape= ip),
    Dense(name="layer_1", units=256, activation="relu"),
    Dense(name="layer_2", units=512, activation="relu"),
    Dense(name="layer_3", units=256, activation="relu"),
    Dense(name="layer_4", units=128, activation="relu"),
    Dense(name="layer_5", units= 64, activation="relu"),
    Dense(name="layer_6", units= 32, activation="relu"),
    Dense(name="layer_7", units= 16, activation="relu"),
    Dense(name="layer_8", units= op, activation="softmax")
])

translator.compile(optimizer="adam", loss="categorical_crossentropy", metrics="categorical_accuracy")

In [None]:
translator.fit(x=x_train, y=y_train, validation_data=(x_test, y_test), epochs=100, verbose=2)

In [None]:
translator.save(filepath="models/python/model.h5")
save_keras_model(artifacts_dir="models/javascript", model=translator)

# import translator

In [None]:
translator = load_model("models/python/model.h5")

# test translator

In [None]:
capture = VideoCapture(0)

with Holistic() as tracker:
    while capture.isOpened():

        _, image = capture.read()

        landmarks = tracker.process(image)

        prediction = SIGNS[argmax(translator.predict(expand_dims(a=get_keypoints(landmarks), axis=0)))]

        show_image(image, prediction, landmarks)

        if waitKey(1) & 0xFF == ord("q"):
            break

capture.release()
destroyAllWindows()