# 1. Import and Install Dependencies

In [6]:
!pip install tensorflow
# from tensorflow.keras.utils import to_categorical
# Currently not able to install tensorflow as the python version in jupyter notebook needs to be downgraded using a virtual enviornment 
# that isolates this project's dependencies from other python installations.
# this is because current version of Python 3.12.4 is not compatible with 



In [70]:
!python --version

Python 3.12.4


In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [76]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make Prediction
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) #COLOR CONVERSION RGB 2 BGR
    return image, results

In [15]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [7]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                              mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1),
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2),
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2),
                             )
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2),
                             ) 

In [101]:
len(results.face_landmarks.landmark)

468

In [378]:
len(mp_holistic.FACEMESH_CONTOURS)

124

In [380]:
print(mp.__version__)

0.10.14


In [95]:
draw_landmarks(frame, results)

In [11]:
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

NameError: name 'frame' is not defined

In [13]:
cap = cv2.VideoCapture(0)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    while cap.isOpened():
    
      #Read Feed
      ret, frame = cap.read()
     
      # Make Predictions
      image, results = mediapipe_detection(frame, holistic)
      # print(results)

      #Draw landmarks
      draw_styled_landmarks(image, results)

        
      #Show to screen
      cv2.imshow("OpenCV Feed", image)
    
      #Break gracefully
      if cv2.waitKey(10) & 0xff == ord('q'):
        break
    cap.release()
    cv2.destroyAllWindows()



In [35]:
len(results.face_landmarks.landmark)

468

# 3. Extract Keypoint Values

In [37]:
results.pose_landmarks.landmark[0]

x: 0.493385226
y: 0.670991898
z: -0.659977734
visibility: 0.99974668

In [327]:
# List Comprehension
pose = (
    np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() 
    if results.pose_landmarks 
    else np.zeros(33*4)
    )

In [331]:
face = (
    np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() 
    if results.face_landmarks 
    else np.zeros(468 * 3)
)

In [333]:
lh = (
    np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() 
    if results.left_hand_landmarks 
    else np.zeros(21 * 3)
)

In [337]:
rh = (
    np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() 
    if results.right_hand_landmarks 
    else np.zeros(21 * 3)
)

In [339]:
pose, face, lh, rh

(array([ 2.48706222e-01,  6.95723414e-01, -1.18184030e+00,  9.99852121e-01,
         2.45735675e-01,  5.92899144e-01, -1.13247871e+00,  9.99730587e-01,
         2.64236033e-01,  5.73703170e-01, -1.13261962e+00,  9.99796569e-01,
         2.82733172e-01,  5.55983126e-01, -1.13277531e+00,  9.99777555e-01,
         1.89164445e-01,  6.48744464e-01, -1.10759246e+00,  9.99655843e-01,
         1.67849004e-01,  6.69281840e-01, -1.10662818e+00,  9.99595881e-01,
         1.45933062e-01,  6.87886298e-01, -1.10686648e+00,  9.99416471e-01,
         3.24261516e-01,  5.56096435e-01, -7.13787735e-01,  9.99831140e-01,
         1.25810534e-01,  7.24996030e-01, -5.54198146e-01,  9.99714315e-01,
         3.20215344e-01,  7.33330667e-01, -1.01426029e+00,  9.99817848e-01,
         2.44324639e-01,  8.02259386e-01, -9.69639480e-01,  9.99699891e-01,
         6.21873319e-01,  8.20281148e-01, -4.04470354e-01,  9.98773217e-01,
         1.31325841e-01,  1.01617825e+00, -2.04120368e-01,  9.85335588e-01,
         1.0

In [310]:
pose.shape,face.shape,lh.shape, rh.shape

((132,), (1404,), (63,), (63,))

In [312]:
132+1404+63*2

1662

In [341]:
rh

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [9]:
def extract_keypoints(results):
    pose = (
    np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() 
    if results.pose_landmarks 
    else np.zeros(33*4)
    )
    face = (
    np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() 
    if results.face_landmarks 
    else np.zeros(468 * 3)
    )
    lh = (
    np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() 
    if results.left_hand_landmarks 
    else np.zeros(21 * 3)
    )
    rh = (
    np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() 
    if results.right_hand_landmarks 
    else np.zeros(21 * 3)
    )
    return np.concatenate([pose, face, lh, rh])
    

In [3]:
extract_keypoints(results).shape

NameError: name 'results' is not defined

In [43]:
result_test = extract_keypoints(results)

In [45]:
result_test

array([ 0.49338523,  0.6709919 , -0.65997773, ...,  0.15633295,
        0.24390131, -0.06281146])

In [47]:
np.save('0', result_test)

# 4. Setup Folders for Collection

In [11]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data')

# Actions that we try to detect
actions = np.array(['hello', 'thanks', 'iloveyou'])

# Thirty videos worth of data
no_sequences = 30

# Length of each sequence
sequence_length = 30

In [13]:
actions.shape[0]
ac = np.array([[1, 2, 3], [4, 5, 6]])
ac.shape[0], ac.shape[1]

(2, 3)

In [15]:
current_directory = os.getcwd()
print(current_directory)

C:\Users\HP\100 Days of ML


In [363]:
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

# 5. Collect Keypoint Values for Training and Testing

In [5]:
cap = cv2.VideoCapture(0)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
# Edit this while loop when you came back from Namaz
    # New Loop
    # Loop through actions
    for action in actions:
        #Loop through sequences aka videos
        for sequence in range(no_sequences):
            #Loop through video length
            for frame_num in range(sequence_length):
              #Read Feed
              ret, frame = cap.read()
             
              # Make Predictions
              image, results = mediapipe_detection(frame, holistic)
              # print(results)
        
              #Draw landmarks
              draw_styled_landmarks(image, results)

              # NEW Apply wait logic
              if frame_num==0:
                  cv2.putText(image, "STARTING COLLECTION", (120, 200),
                         cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                  cv2.putText(image, "Collecting frames for {} and Video number {}".format(action, sequence), (15, 12),
                         cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                  cv2.waitKey(2000)
                  cv2.imshow("Open CV Feed", image)
              else:
                  cv2.putText(image, "Collecting frames for {} and Video number {}".format(action, sequence), (15, 12),
                         cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                  cv2.imshow("Open CV Feed", image)
              # Extract Keypoints    
              keypoints = extract_keypoints(results)
              npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
              np.save(npy_path, keypoints)
              #Show to screen
              # cv2.imshow("OpenCV Feed", image)
    
              #Break gracefully
              if cv2.waitKey(10) & 0xff == ord('q'):
                break
    cap.release()
    cv2.destroyAllWindows()

NameError: name 'mp_holistic' is not defined

In [13]:
cv2.putText??

[1;31mDocstring:[0m
putText(img, text, org, fontFace, fontScale, color[, thickness[, lineType[, bottomLeftOrigin]]]) -> img
.   @brief Draws a text string.
.   
.   The function cv::putText renders the specified text string in the image. Symbols that cannot be rendered
.   using the specified font are replaced by question marks. See #getTextSize for a text rendering code
.   example.
.   
.   @param img Image.
.   @param text Text string to be drawn.
.   @param org Bottom-left corner of the text string in the image.
.   @param fontFace Font type, see #HersheyFonts.
.   @param fontScale Font scale factor that is multiplied by the font-specific base size.
.   @param color Text color.
.   @param thickness Thickness of the lines used to draw a text.
.   @param lineType Line type. See #LineTypes
.   @param bottomLeftOrigin When true, the image data origin is at the bottom-left corner. Otherwise,
.   it is at the top-left corner.
[1;31mType:[0m      builtin_function_or_method

# 6. Preprocess Data and Create Labels and Features

In [17]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [19]:
for num, label in enumerate(actions):
    print("{}:{}".format(label, num))

hello:0
thanks:1
iloveyou:2


In [21]:
label_map = {label:num for num, label in enumerate(actions)}

In [23]:
label_map

{'hello': 0, 'thanks': 1, 'iloveyou': 2}

In [25]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window=[]
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [27]:
np.array(sequences).shape

(90, 30, 1662)

In [29]:
np.array(labels).shape

(90,)

In [31]:
X = np.array(sequences)

In [33]:
X.shape

(90, 30, 1662)

In [35]:
y = to_categorical(labels).astype(int)

In [45]:
y

array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0,

In [37]:
labels = np.array(labels)

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

In [41]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((85, 30, 1662), (5, 30, 1662), (85, 3), (5, 3))

# 7. Build and Train LSTM Neural Network

In [43]:
try:
    import tensorflow as tf
    print(f"TensorFlow version {tf.__version__} is installed.")
except ImportError:
    print("TensorFlow is not installed.")

TensorFlow version 2.17.0 is installed.


In [89]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import plot_model

In [47]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [49]:
# Model Architecture = > Input Layer, Hidden Layer, Output Layer
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(**kwargs)


In [51]:
model.summary()

In [128]:
res = [.7, 0.2, 0.1]

In [130]:
actions[np.argmax(res)]

'hello'

In [53]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [65]:
model.fit(X_train, y_train, epochs=601, callbacks=[tb_callback])

Epoch 1/601
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 63ms/step - categorical_accuracy: 0.3268 - loss: 8.7872
Epoch 2/601
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step - categorical_accuracy: 0.3542 - loss: 20.2167
Epoch 3/601
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - categorical_accuracy: 0.3013 - loss: 38.0793
Epoch 4/601
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - categorical_accuracy: 0.3366 - loss: 29.6232
Epoch 5/601
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - categorical_accuracy: 0.3640 - loss: 10.2733
Epoch 6/601
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - categorical_accuracy: 0.4618 - loss: 11.1783
Epoch 7/601
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - categorical_accuracy: 0.3933 - loss: 54.5953
Epoch 8/601
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step 

<keras.src.callbacks.history.History at 0x1d4ec11c1a0>

In [67]:
X_train.shape, y_train.shape

((85, 30, 1662), (85, 3))

In [69]:
X_test.shape, y_test.shape

((5, 30, 1662), (5, 3))

In [1]:
# sequence_length = X_train.shape[1]  # 30
# y_train_expanded = np.repeat(y_train[:, np.newaxis, :], sequence_length, axis=1)

In [3]:
# y_train_expanded.shape

# 8. Make Predictions

In [95]:
res = model.predict(X_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


In [105]:
res

array([[1.3531095e-05, 9.9991572e-01, 7.0737944e-05],
       [9.9292409e-01, 6.8744491e-03, 2.0140246e-04],
       [2.9969658e-04, 9.9888057e-01, 8.1978168e-04],
       [6.9833723e-06, 2.6357447e-04, 9.9972934e-01],
       [9.9591476e-01, 9.2563854e-04, 3.1595032e-03]], dtype=float32)

In [101]:
actions[np.argmax(res[0])]

'thanks'

In [103]:
actions[np.argmax(y_test[0])]

'thanks'

# 9. Save Weights

In [81]:
model.save('action.h5')



In [83]:
del model

In [91]:
model = load_model('action.h5')



# 10. Evaluation using Confusion Matrix and Accuracy

In [107]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [137]:
# y_hat = model.predict(X_test) # For Testing dataset
y_hat = model.predict(X_train) # For Training dataset

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step 


In [139]:
y_true = np.argmax(y_train, axis=1).tolist()
y_hat = np.argmax(y_hat, axis=1).tolist()

In [141]:
multilabel_confusion_matrix(y_true, y_hat)

array([[[56,  1],
        [ 0, 28]],

       [[57,  0],
        [ 1, 27]],

       [[56,  0],
        [ 0, 29]]], dtype=int64)

In [135]:
multilabel_confusion_matrix??

[1;31mSignature:[0m
[0mmultilabel_confusion_matrix[0m[1;33m([0m[1;33m
[0m    [0my_true[0m[1;33m,[0m[1;33m
[0m    [0my_pred[0m[1;33m,[0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0msample_weight[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mlabels[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0msamplewise[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mSource:[0m   
[1;33m@[0m[0mvalidate_params[0m[1;33m([0m[1;33m
[0m    [1;33m{[0m[1;33m
[0m        [1;34m"y_true"[0m[1;33m:[0m [1;33m[[0m[1;34m"array-like"[0m[1;33m,[0m [1;34m"sparse matrix"[0m[1;33m][0m[1;33m,[0m[1;33m
[0m        [1;34m"y_pred"[0m[1;33m:[0m [1;33m[[0m[1;34m"array-like"[0m[1;33m,[0m [1;34m"sparse matrix"[0m[1;33m][0m[1;33m,[0m[1;33m
[0m        [1;34m"sample_weight"[0m[1;33m:[0m [1;33m[[0m[1;34m"array-like"[0m[1;33m,[0m [1;32mNone[0m[1;33m][0m[1;33

In [143]:
accuracy_score(y_true, y_hat)

0.9882352941176471

# 11. Test in Real Time

In [129]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [131]:
plt.figure(figsize=(18,18))
plt.imshow(prob_viz(res, actions, image, colors))

NameError: name 'image' is not defined

<Figure size 1800x1800 with 0 Axes>