# Like or Dislike classification
### by Yasin Shafiei

classify likes or dislikes in video using LSTM

### Import all libraries and define variables

In [21]:
# import all libraries 
import tensorflow as tf
import numpy as np
import mediapipe as mp
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.layers import *
import cv2
import os
from keras.models import Sequential

In [22]:
# define variables and hyperparameters
DATA_PATH = os.path.join("MP_Data")
n_sequence = 50
len_sequence = 30
EPOCHS = 1500

In [23]:
# import the holistic model and drawing utils
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

### Draw landmark

In [24]:
def mediapipe_detection(image, model):
    """
    This function will detect keypoints and their exact positions
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results  = model.process(image)
    image.flags.writeable = True 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    return image, results

In [25]:
def draw_landmarks(image, results):
    """
     This function will draw landmarks on the hole body
    """
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=1, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=1, circle_radius=1)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=1, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=1, circle_radius=1)
                             )
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 

Now we created some functions to detect and draw keypoints on our body
Lets preprocess and load our data

### Load and preprocess the dataset

In [26]:
def extract_keypoints(results):
    """
    This function will extract all of the keypoints and put them in an array
    """
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [27]:
# defile a list for actions
actions = np.array(["like", "dislike"])

# define labelmap
label_map = {label : num for num, label in enumerate(actions)}

In [28]:
# define lists for sequence and label
sequences = list()
labels = list()

# loop all over actions 
for action in actions :
    # loop all over sequences (videos)
    for sequence in range(n_sequence):
        # define a list for window
        window = []
        # loop all over frames
        for n_frame in range(len_sequence):
            # load the keypoints
            data = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(n_frame)))
            window.append(data)
        
        # add action and video in to their lists
        labels.append(label_map[action])
        sequences.append(window)

print("Data loaded!")

Data loaded!


In [29]:
# define the X and y
X = np.array(sequences)
y = to_categorical(labels).astype(int)

In [30]:
# split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [31]:
# see shape of the data
print(f"Shape of your data: {X_train.shape[1:]}")

Shape of your data: (30, 1662)


Now we created our dataset and it's ready for training. Lets go and create our LSTM model

### Neural Networks

In [52]:
# define the model
model = Sequential()

# LSTM layers
model.add(LSTM(64, return_sequences=True, activation="relu", input_shape=(30, 1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(128, return_sequences=False, activation="relu"))

# Dense layers
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation="relu"))

# Dropout
model.add(Dropout(0.1))


# Output layer
model.add(Dense(actions.shape[0], activation='softmax'))



In [53]:
# see the model summary
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 30, 64)            442112    
                                                                 
 lstm_7 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_8 (LSTM)               (None, 128)               131584    
                                                                 
 dense_6 (Dense)             (None, 128)               16512     
                                                                 
 dense_7 (Dense)             (None, 128)               16512     
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense_8 (Dense)             (None, 2)                

In [54]:
# compile the model
model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [56]:
# Train the model
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x22428e26a10>

### Save the model

In [57]:
model.save("model.h5")

### Load and test the model

In [58]:
# load model weights
model.load_weights("model.h5")

In [59]:
def probability_visualize(res, action, input_frame, colors):
    """
    This functino will visualize the probability bar in the window
    """
    # define output layers 
    output_frame =  input_frame.copy()

    # loop all over number and probability
    for num, prob in enumerate(res):
        # set the text and progress bar
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)   

    return output_frame

In [60]:
colors = [(245, 117, 16), (117, 245, 16)]

In [61]:
# define list for sequence and a var for threshold
sequence = list()
threshold = 0.8

# read the webcam
cap = cv2.VideoCapture(0)

# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.8, min_tracking_confidence=0.8) as holistic:
    while cap.isOpened():
        # read the capture
        ret, frame = cap.read()

        # make detection an draw landmarks
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results)

        # Make prediction
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            
            # visualize probability
            image = probability_visualize(res, actions, image, colors)

        # show the prediction result
        cv2.putText(image, actions[np.argmax(res)], (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)

        # Show the webcam
        cv2.imshow("Detection", image)

        # waitkey and quit with q
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

