# 1. Dependencies

In [8]:
import cv2
import re
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp  
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# 2. Functions

In [2]:
mp_holistic = mp.solutions.holistic # bringing the holstic model
mp_drawing = mp.solutions.drawing_utils # drawing utilities

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction - detecting using mediapipe
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections
    
# CAN CHANGE THE COLOURS OF THESE TO MAKE IT DIFFERENT 
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

# Extracting data points
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

# 2. Data Gathering

In [15]:
INPUT_VIDEO_PATH = r'C:\Users\Tommaso\Google Drive\Current Courses\COMP9444\CodingTasks\Dataset'

# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Videos are going to be 30 frames in length
SEQUENCE_LENGTH = 30

label_map = {}
no_sequences = {}
actions = []
# VIDEO NAMING CONVENTION = "ACTION.SEQUENCE.mp4" 
# e.g. "V.001.mp4"
for i,vid in enumerate(os.listdir(INPUT_VIDEO_PATH)):
    print(vid,i)
    string = vid.split('.')
    action = string[0]
    sequence = int(string[1])
    cap = cv2.VideoCapture(vid)
    label_map[action] = i
    
    if action in no_sequences:
        no_sequences[action] += 1
    else:
       no_sequences[action] = 1
      
    if action not in actions:
        actions.append(action)
    try: 
        os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
    except:
        print('Directory Already Exists, passing file')
        continue
        
    vid_loc = os.path.join(INPUT_VIDEO_PATH,vid)
    cap = cv2.VideoCapture(vid_loc)
    # Set mediapipe model 
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        # this is code for reading from a video file
        frame_num = 0
        while(cap.isOpened() and frame_num < SEQUENCE_LENGTH):
            # Read feed
            ret, frame = cap.read()
            
            if ret:
                image, results = mediapipe_detection(frame, holistic)
                # Draw landmarks
                draw_styled_landmarks(image, results)
                cv2.waitKey(1)
                
                # UNCOMMENT THIS TO SEE THE VIDEO DISPLAYED
                # cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                #                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # # Show to screen
                # cv2.imshow('OpenCV Feed', image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)
                frame_num += 1
            else:
                break
            
    cap.release()
    cv2.destroyAllWindows()



R.001.mp4 0
Directory Already Exists, passing file
U.001.mp4 1
Directory Already Exists, passing file
V.001.mp4 2
Directory Already Exists, passing file


# 3. Data Preprocessing

In [26]:
print(no_sequences)
print(actions)

sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences[action]):
        window = []
        for frame_num in range(SEQUENCE_LENGTH):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence+1), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

X = np.array(sequences)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

{'R': 1, 'U': 1, 'V': 1}
['R', 'U', 'V']


# 4. Build and Train LSTM Neural Network

In [None]:
import torch
import torchinfo


# batch_size = 32
# timesteps = 12
# input_features = 16
# h1_features = 8
# h2_features = 4
# h3_features = 2
# output_features = 1

# class SimpleModel(torch.nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.lstm1 = torch.nn.LSTM(input_size=input_features, hidden_size=h1_features)
#         self.lstm2 = torch.nn.LSTM(input_size=h1_features, hidden_size=h2_features)
#         self.lstm2 = torch.nn.LSTM(input_size=h1_features, hidden_size=h2_features)
#         self.fc1 = torch.nn.Linear(h2_features, h3_features)
#         self.relu = torch.nn.ReLU()
#         self.fc2 = torch.nn.Linear(h3_features, output_features)
#         self.sigmoid = torch.nn.Sigmoid()

#     def forward(self, inputs):
#         h1, (h1_T,c1_T) = self.lstm1(inputs)
#         h2, (h2_T, c2_T) = self.lstm2(h1)
#         h3 = self.fc1(h2[-1,:,:])
#         h3 = self.relu(h3)
#         output = self.fc2(h3)
#         output = self.sigmoid(output)
#         return output

print('Hello')


log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
