In [1]:
from tensorflow import keras
from tensorflow.keras import layers
import json
import numpy as np
import tensorflow as tf

# Load Model
def conv_bn(x, filters):
    x = layers.Conv1D(filters, kernel_size=1, padding="valid")(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)


def dense_bn(x, filters):
    x = layers.Dense(filters)(x)
    x = layers.BatchNormalization(momentum=0.0)(x)
    return layers.Activation("relu")(x)

class OrthogonalRegularizer(keras.regularizers.Regularizer):
    def __init__(self, num_features, l2reg=0.001):
        super().__init__()
        self.num_features = num_features
        self.l2reg = l2reg
        self.eye = tf.eye(num_features)

    def __call__(self, x):
        x = tf.reshape(x, (-1, self.num_features, self.num_features))
        xxt = tf.tensordot(x, x, axes=(2, 2))
        xxt = tf.reshape(xxt, (-1, self.num_features, self.num_features))
        return tf.reduce_sum(self.l2reg * tf.square(xxt - self.eye))
    
    def get_config(self):
        config = super().get_config()
        config.update({"num_features": self.num_features, "l2reg_strength": self.l2reg})
        return config
    
def tnet(inputs, num_features):

    # Initalise bias as the indentity matrix
    bias = keras.initializers.Constant(np.eye(num_features).flatten())
    reg = OrthogonalRegularizer(num_features)

    x = conv_bn(inputs, 32)
    x = conv_bn(x, 128)
    x = conv_bn(x, 256)
    x = conv_bn(x, 1028)
    x = layers.GlobalMaxPooling1D()(x)
    x = dense_bn(x, 512)
    x = dense_bn(x, 256)
    x = dense_bn(x, 128)
    x = layers.Dense(
        num_features * num_features,
        kernel_initializer="zeros",
        bias_initializer=bias,
        activity_regularizer=reg,
    )(x)
    feat_T = layers.Reshape((num_features, num_features))(x)
    # Apply affine transformation to input features
    return layers.Dot(axes=(2, 1))([inputs, feat_T])


with open("labaled_data.json", "r") as outfile:
            data = json.load(outfile)

classes = set()
for i in data:
    classes.add(data.get(i)[1])
classes = list(classes)

NUM_POINTS = 478
NUM_CLASSES = len(classes)

#Point net model
inputs = keras.Input(shape=(NUM_POINTS, 3))

x = tnet(inputs, 3)
x = conv_bn(x, 32)
x = conv_bn(x, 32)
x = tnet(x, 32)
x = conv_bn(x, 32)
x = conv_bn(x, 64)
x = conv_bn(x, 128)
x = conv_bn(x, 512)
x = layers.GlobalMaxPooling1D()(x)
x = dense_bn(x, 512)
x = dense_bn(x, 256)
x = layers.Dropout(0.3)(x)
x = dense_bn(x, 128)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs, name="pointnet")
model.summary()


Model: "pointnet"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 478, 3)]     0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 478, 32)      128         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 478, 32)      128         conv1d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 478, 32)      0           batch_normalization[0][0]        
___________________________________________________________________________________________

In [2]:
try:
    model.load_weights("./checkpoint_"+"/Pointnet_expanded_our2")
except:
    print("no model data")

In [3]:
# Data Processing and model code
# import necessary packages

import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model
import time
import datetime

# initialize mediapipe
mpHands = mp.solutions.hands
mpFace = mp.solutions.face_detection
hands = mpHands.Hands(max_num_hands=2, min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh


# Load the gesture recognizer model
# model = load_model('mp_hand_gesture')

# Load class names
# f = open('gesture.names', 'r')
# classNames = f.read().split('\n')
# f.close()
# print(classNames)
classNames = classes

# Initialize the webcam
drawing_spec = mpDraw.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
with mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as face_mesh:
    while cap.isOpened():
        success, frame = cap.read()

        if not success:
          print("Ignoring empty camera frame.")
          # If loading a video, use 'break' instead of 'continue'.
          continue
        # Read each frame from the webcam

#         display(frame)
        x, y, c = frame.shape

        # Flip the frame vertically
        frame.flags.writeable = False
        frame = cv2.flip(frame, 2)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Get hand landmark prediction
        result = hands.process(frame)
        resultFace = face_mesh.process(frame)
        # print(result)

        className = ''

        # post process the result
        frame.flags.writeable = True
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
#         if result.multi_hand_landmarks:
#             landmarks = []
#             for handslms in result.multi_hand_landmarks:
# #                 count +=1
#                 for lm in handslms.landmark:
#                     # print(id, lm)
#                     lmx = int(lm.x * x)
#                     lmy = int(lm.y * y)

#                     landmarks.append([lmx, lmy])
                    

#                 # Drawing landmarks on frames
#                 mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)
#                 if (count <= 1):
#                     # Predict gesture
#                     prediction = model.predict([landmarks])
#                     # print(prediction)
#                     classID = np.argmax(prediction)
#                     className = classNames[classID]
#                 print("hands landmarks: ")
#                 display(handslms)
#         468 face mesh points
        if resultFace.multi_face_landmarks:
#             count = 0
            for face_landmarks in resultFace.multi_face_landmarks:
#                 count +=1
                mpDraw.draw_landmarks(
                    image=frame,
                    landmark_list=face_landmarks,
                    connections=mp_face_mesh.FACEMESH_TESSELATION,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=mp_drawing_styles
                    .get_default_face_mesh_tesselation_style())
                mpDraw.draw_landmarks(
                    image=frame,
                    landmark_list=face_landmarks,
                    connections=mp_face_mesh.FACEMESH_CONTOURS,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=mp_drawing_styles
                    .get_default_face_mesh_contours_style())
                mpDraw.draw_landmarks(
                    image=frame,
                    landmark_list=face_landmarks,
                    connections=mp_face_mesh.FACEMESH_IRISES,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=mp_drawing_styles
                    .get_default_face_mesh_iris_connections_style())
#             print("face landmarks: ")
#             display(face_landmarks)
#             current_time = datetime.datetime.now(datetime.timezone.utc)
            xyz = [(lm.x, lm.y, lm.z) for lm in face_landmarks.landmark]
#             display(xyz)
            # Predict gesture
            prediction = model.predict([xyz])
            # print(prediction)
            classID = np.argmax(prediction)
            className = classNames[classID]
#             face_lm_inputs = 
#             display(xyzt)
            
#         time.sleep(5)
            
        # show the prediction on the frame
        frame = cv2.flip(frame, 1)
        cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 
                       1, (0,0,255), 2, cv2.LINE_AA)
        
        # Show the final output
        cv2.imshow("Output", frame) 

        if cv2.waitKey(1) == ord('q'):
            break

# release the webcam and destroy all active windows
cap.release()

cv2.destroyAllWindows()