# 1. Import and Install Dependencies

In [None]:
#pip install tensorflow tensorflow-gpu opencv-python mediapipe sklearn matplotlib

In [None]:
import cv2 as cv
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [None]:
# Holistic model
mp_holistic = mp.solutions.holistic

# Drawing utilities
mp_drawing = mp.solutions.drawing_utils

In [None]:
def mediapipe_detection(image, model):
    # COLOR CONVERSION from BGR to RGB
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    
    # Image is no longer writeable
    image.flags.writeable = False
    
    # Make prediction
    results = model.process(image)
    
    # Image is now writeable
    image.flags.writeable = True
    
    # COLOR CONVERSION from RGB to BGR
    image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
    return image, results

In [None]:
def draw_styled_landmarks(image, results):
    
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             )
    
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=3, circle_radius=3), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=3), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    
    # Draw left hand connections  
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=3), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [None]:
def rescaleFrame(frame, scale = 0.5) :
    width = int(frame.shape[1] * scale)
    height = int(frame.shape[0] * scale)
    dimensions = (width, height)

    return cv.resize(frame, dimensions, interpolation = cv.INTER_AREA)

# 3. Extract Keypoint Values

In [None]:
# Storing all the landmarks in array using list comprehension

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# 4. Collect Keypoint Values for Training and Testing

In [None]:
DIR = 'Dataset'
SAVE_PATH = 'NP_Array_Data'

for file in os.listdir(DIR) :
    try :
        os.makedirs(os.path.join(SAVE_PATH, file))
    except :
        pass
    
    for video in os.listdir(DIR + '/' + file) :
        try :
            os.makedirs(os.path.join(SAVE_PATH, file, video))
        except :
            pass
        print(video)
        cap = cv.VideoCapture(DIR + '/' + file + '/' + video)
        fps = int(cap.get(cv.CAP_PROP_FPS))
        frame_count = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
        cap_frame = 0
        i = 1

        # Set mediapipe model 
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

            while cap.isOpened():

                # Read feed
                isTrue, frame = cap.read()
                cap_frame += 1

                # Mirror the feed horizontally
                # flip_frame = cv.flip(frame, 1)

                if isTrue :
                    if cap_frame <= 50 :

                        # Make detections
                        image, results = mediapipe_detection(frame, holistic)

                        # Draw landmarks
                        draw_styled_landmarks(image, results)

                        # Display
                        cv.imshow('Live Feed', rescaleFrame(image))

                        # Making Numpy arrays
                        if (i % 2 == 0) & (i % 5 != 0):
                            keypoints = extract_keypoints(results)
                            try :
                                npy_path = os.path.join(SAVE_PATH, file, video, str(int(i/2)))
                                np.save(npy_path, keypoints)
                            except :
                                pass

                        if cv.waitKey(1) & 0xFF == ord('q'):
                            break
                        i = i + 1
                else :
                    break
            cap.release()
            cv.destroyAllWindows()

# 5. Preprocess Data and Create Labels and Features

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
SIGNS_PATH = 'NP_Array_Data/'
signs = []

for file in os.listdir(SIGNS_PATH):
    signs.append(file)

action = np.array(signs)
np.save('action', action)

In [None]:
action = np.load('action.npy')
action

In [None]:
label_map = {label:num for num, label in enumerate(action)}
label_map

In [None]:
DIR = 'NP_Array_Data'

sequences = []
labels = []
  
for actions in os.listdir(DIR) :
    for videos in os.listdir(DIR + '/' + actions) :
        window = []
        for frames in os.listdir(DIR + '/' + actions + '/' + videos) :
            res = np.load(os.path.join(DIR, actions, videos, frames))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[actions])

In [None]:
Path = 'D:/ISL'

npy_path = os.path.join(Path, 'sequences')
np.save(npy_path, sequences)
npy_path = os.path.join(Path, 'labels')
np.save(npy_path, labels)

In [None]:
x = np.load('sequences.npy')
print(x)

In [None]:
# One hot encoding
labels = np.load('labels.npy')
y = to_categorical(labels).astype(int)
y

In [None]:
# Splitting our training and testing data

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.05)

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# 6. Build and Train LSTM Neural Network

In [None]:
from tensorflow.keras.models import Sequential
from keras.layers import Dense, LSTM
from tensorflow.keras.callbacks import TensorBoard

In [None]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
del model

In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences = True, activation = 'relu', input_shape = (x.shape[1], x.shape[2])))
model.add(LSTM(128, return_sequences = True, activation = 'relu'))
model.add(LSTM(64, return_sequences = False, activation = 'relu'))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(action.shape[0], activation = 'softmax'))

In [None]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
model.fit(x_train, y_train, epochs = 0, validation_data=(x_test, y_test), callbacks=[tb_callback])
# validation_data=(x_test, y_test)

In [None]:
model.summary()

In [None]:
model.load_weights('Saved_Model_Weights/demo_model.h5')

# 7. Make Predictions

In [None]:
y_pred = model.predict(x)

In [None]:
i = 1
for i in range(len(x)) :
    Actual = action[np.argmax(y[i])]
    Prediction = action[np.argmax(y_pred[i])]
    print('Actual :', Actual)
    print('Prediction :', Prediction)

# 8. Saving and loading Weights

In [None]:
# model.save_weights('Saved_Model_Weights/demo_model.h5')

In [None]:
# model.load_weights('Saved_Model_Weights/demo_model.h5')

# 9. Evaluation using Confusion Matrix and Accuracy

In [None]:
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix, accuracy_score

In [None]:
y_pred = model.predict(x_test)

In [None]:
y_true = np.argmax(y_test, axis=1).tolist()
y_pred = np.argmax(y_pred, axis=1).tolist()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_true, y_pred)

## Get Class Labels
class_names = action

# Plot confusion matrix in a beautiful manner
fig = plt.figure(figsize=(5, 5))
ax= plt.subplot()
sns.heatmap(cm, annot=True, ax = ax, fmt = 'g'); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted', fontsize=20)
ax.xaxis.set_label_position('bottom')
plt.xticks(rotation=90)
# ax.xaxis.set_ticklabels(class_names, fontsize = 10)
ax.xaxis.tick_bottom()

ax.set_ylabel('True', fontsize=20)
# ax.yaxis.set_ticklabels(class_names, fontsize = 10)
plt.yticks(rotation=0)

plt.title('Refined Confusion Matrix', fontsize=20)

plt.savefig('Confusion_Matrix_heatmap.png')
plt.show()

In [None]:
multilabel_confusion_matrix(y_true, y_pred)

In [None]:
print('Accuracy :', 100 * accuracy_score(y_true, y_pred), '%')

# 10. Test in Real Time

In [None]:
def make_1080p():
    cap.set(3, 1920)
    cap.set(4, 1080)
    
def rescaleFeed(frame, scale = 3) :
    width = int(frame.shape[1] * scale)
    height = int(frame.shape[0] * scale)
    dimensions = (width, height)

    return cv.resize(frame, dimensions, interpolation = cv.INTER_AREA)

In [None]:
from scipy import stats

In [None]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv.putText(output_frame, actions[num], (0, 85+num*40), cv.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv.LINE_AA)
        
    return output_frame

In [None]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.8

cap = cv.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        
        # Mirror Effect
        image = cv.flip(frame, 1)
        
        # Make detections
        image, results = mediapipe_detection(image, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-20:]
        
        if len(sequence) == 20:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(action[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if action[np.argmax(res)] != sentence[-1]:
                            sentence.append(action[np.argmax(res)])
                    else:
                        sentence.append(action[np.argmax(res)])
            if len(sentence) > 5: 
                sentence = sentence[-5:]
            
        cv.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv.putText(image, ' '.join(sentence), (3,30), 
                       cv.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv.LINE_AA)

        cv.imshow('OpenCV Feed', image)

        if cv.waitKey(10) & 0xFF == 27:
            break
    cap.release()
    cv.destroyAllWindows()