In [1]:
#import
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
import numpy as np
import matplotlib.pyplot as plt
import h5py
import random
import json
import cv2
import math
import mediapipe as mp
import time
import datetime


In [2]:
file_name = "labeled_images.h5"

base_dir = "./"+file_name

with open("labaled_data.json", "r") as outfile:
    pointcloud_data = json.load(outfile)

classes = set()
for i in pointcloud_data:
    classes.add(pointcloud_data.get(i)[1])
classes = list(classes)


SPLIT_RATIO = 0.8
with h5py.File(file_name, "r") as file:
    c = list(zip(list(file['images']),list(file['labels'])))
    sz = len(c)
    cut = int(sz*SPLIT_RATIO)
    random.shuffle(c)

    training=c[:cut]
    testing=c[cut:]
    
    training_data=[]
    training_labels=[]
    testing_data=[]
    testing_labels=[]

    for a in training:
        training_data.append(a[0])
        training_labels.append(a[1])

    for a in testing:
        testing_data.append(a[0])
        testing_labels.append(a[1])

training_x=np.array(training_data)
training_labels=np.array(training_labels)
training_data = tf.data.Dataset.from_tensor_slices((training_x, training_labels))
testing_x=np.array(testing_data)
testing_labels=np.array(testing_labels)
testing_data = tf.data.Dataset.from_tensor_slices((testing_x, testing_labels))

IMAGE_SIZE = 224

BATCH_SIZE = 4

input_shape = training_x.shape[1:]

train_dataset = training_data.batch(BATCH_SIZE)
inputs = keras.Input(shape=input_shape)


x = layers.Rescaling(1.0 / 255)(inputs)  # Rescale inputs
base_model = keras.applications.ResNet50V2(  # Add the rest of the model
    weights=None, input_shape=input_shape, classes=2
)(x)


model = keras.Model(inputs, base_model)
model.compile(optimizer=keras.optimizers.experimental.RMSprop(learning_rate=0.001), 
              loss="sparse_categorical_crossentropy", 
              metrics=[keras.metrics.SparseCategoricalAccuracy()])



In [3]:
try:
    model.load_weights("./Saved_resnet_weights")
except:
    print("no model data")

In [4]:
# Data Processing and model code
# import necessary packages

# initialize mediapipe
mpHands = mp.solutions.hands
mpFace = mp.solutions.face_detection
hands = mpHands.Hands(max_num_hands=2, min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh


# Load the gesture recognizer model
# model = load_model('mp_hand_gesture')

# Load class names
# f = open('gesture.names', 'r')
# classNames = f.read().split('\n')
# f.close()
# print(classNames)

classNames = classes
EXTRA_PADDING=0.02

# Initialize the webcam
drawing_spec = mpDraw.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
with mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as face_mesh:
    while cap.isOpened():
        success, frame = cap.read()

        if not success:
          print("Ignoring empty camera frame.")
          # If loading a video, use 'break' instead of 'continue'.
          continue
        # Read each frame from the webcam

#         display(frame)
        x, y, c = frame.shape
        dimensions = frame.shape
        height = frame.shape[0]
        width = frame.shape[1]
        channels = frame.shape[2]
        # Flip the frame vertically
        frame.flags.writeable = False
        frame = cv2.flip(frame, 2)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Get hand landmark prediction
        result = hands.process(frame)
        resultFace = face_mesh.process(frame)
        # print(result)

        className = ''

        # post process the result
        frame.flags.writeable = True
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
#         if result.multi_hand_landmarks:
#             landmarks = []
#             for handslms in result.multi_hand_landmarks:
# #                 count +=1
#                 for lm in handslms.landmark:
#                     # print(id, lm)
#                     lmx = int(lm.x * x)
#                     lmy = int(lm.y * y)

#                     landmarks.append([lmx, lmy])
                    

#                 # Drawing landmarks on frames
#                 mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)
#                 if (count <= 1):
#                     # Predict gesture
#                     prediction = model.predict([landmarks])
#                     # print(prediction)
#                     classID = np.argmax(prediction)
#                     className = classNames[classID]
#                 print("hands landmarks: ")
#                 display(handslms)
#         478 face mesh points
        if resultFace.multi_face_landmarks:
            for face_landmarks in resultFace.multi_face_landmarks:
#                 mpDraw.draw_landmarks(
#                     image=frame,
#                     landmark_list=face_landmarks,
#                     connections=mp_face_mesh.FACEMESH_TESSELATION,
#                     landmark_drawing_spec=None,
#                     connection_drawing_spec=mp_drawing_styles
#                     .get_default_face_mesh_tesselation_style())
#                 mpDraw.draw_landmarks(
#                     image=frame,
#                     landmark_list=face_landmarks,
#                     connections=mp_face_mesh.FACEMESH_CONTOURS,
#                     landmark_drawing_spec=None,
#                     connection_drawing_spec=mp_drawing_styles
#                     .get_default_face_mesh_contours_style())
#                 mpDraw.draw_landmarks(
#                     image=frame,
#                     landmark_list=face_landmarks,
#                     connections=mp_face_mesh.FACEMESH_IRISES,
#                     landmark_drawing_spec=None,
#                     connection_drawing_spec=mp_drawing_styles
#                     .get_default_face_mesh_iris_connections_style())
#             print("face landmarks: ")
#             display(face_landmarks)
#             current_time = datetime.datetime.now(datetime.timezone.utc)
                xyz = [(lm.x, lm.y, lm.z) for lm in face_landmarks.landmark]
                points = np.array(xyz)
                min_x = (min(points[:, 0])-EXTRA_PADDING) if (min(points[:, 0])-EXTRA_PADDING) > 0 else 0
                max_x = (max(points[:, 0])+EXTRA_PADDING) if (min(points[:, 0])+EXTRA_PADDING) < 1 else 1
                min_y = (min(points[:, 1])-EXTRA_PADDING) if (min(points[:, 0])-EXTRA_PADDING) > 0 else 0
                max_y = (max(points[:, 1])+EXTRA_PADDING) if (min(points[:, 0])+EXTRA_PADDING) < 1 else 1
                face_Coor_min_x = int((min_x)*width)
                face_Coor_max_x = int((max_x)*width)
                face_Coor_min_y = int((min_y)*height)
                face_Coor_max_y = int((max_y)*height)
                if (face_Coor_max_x-face_Coor_min_x) < (face_Coor_max_y-face_Coor_min_y):
                    face_Coor_max_x=math.ceil((face_Coor_max_x+face_Coor_min_x)/2)+math.ceil((face_Coor_max_y-face_Coor_min_y)/2)
                    face_Coor_min_x=math.floor((face_Coor_max_x+face_Coor_min_x)/2)-math.ceil((face_Coor_max_y-face_Coor_min_y)/2)
                    if face_Coor_min_x < 0 :
                        face_Coor_min_x = 0
                    if face_Coor_max_x > width:
                        face_Coor_max_x = width
                elif (face_Coor_max_x-face_Coor_min_x) > (face_Coor_max_y-face_Coor_min_y):
                    face_Coor_max_y=math.ceil((face_Coor_max_y+face_Coor_min_y)/2)+math.ceil((face_Coor_max_x-face_Coor_min_x)/2)
                    face_Coor_min_y=math.floor((face_Coor_max_y+face_Coor_min_y)/2)-math.ceil((face_Coor_max_x-face_Coor_min_x)/2)
                    if face_Coor_min_y < 0 :
                        face_Coor_min_y = 0
                    if face_Coor_max_y > height:
                        face_Coor_max_y = height
                count =0
                while (face_Coor_max_x-face_Coor_min_x) != (face_Coor_max_y-face_Coor_min_y):
                    if (face_Coor_max_x-face_Coor_min_x) < (face_Coor_max_y-face_Coor_min_y):
                        if count%2==0:
                            face_Coor_max_x+=1
                        elif count%2==1:
                            face_Coor_min_x-=1
                    elif (face_Coor_max_x-face_Coor_min_x) > (face_Coor_max_y-face_Coor_min_y):
                        if count%2==0:
                            face_Coor_max_y+=1
                        elif count%2==1:
                            face_Coor_min_y-=1
                    count+=1

                face = frame[face_Coor_min_y:face_Coor_max_y, face_Coor_min_x:face_Coor_max_x]

                dim = (224, 224)
                resized = cv2.resize(face, dim, interpolation = cv2.INTER_AREA)
                face=np.array([resized])
    #             display("face image data shape:", resized.shape)
                # Predict gesture
                prediction = model.predict(face[:1], verbose='0')
                # print(prediction)
                classID = np.argmax(prediction)
                className = classNames[classID]
#             face_lm_inputs = 
#             display(xyzt)
            
#         time.sleep(5)
            
        # show the prediction on the frame
        frame = cv2.flip(frame, 1)
        cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 
                       1, (0,0,255), 2, cv2.LINE_AA)
        
        # Show the final output
        cv2.imshow("Output", frame) 

        if cv2.waitKey(1) == ord('q'):
            break

# release the webcam and destroy all active windows
cap.release()

cv2.destroyAllWindows()