In [2]:
# import numpy as np
# import cv2 as cv
# cap = cv.VideoCapture(0)
# if not cap.isOpened():
#     print("Cannot open camera")
#     exit()
# while True:
#     # Capture frame-by-frame
#     ret, frame = cap.read()
#     # if frame is read correctly ret is True
#     if not ret:
#         print("Can't receive frame (stream end?). Exiting ...")
#         break
#     # Our operations on the frame come here
#     gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
#     # Display the resulting frame
#     cv.imshow('frame', gray)
#     if cv.waitKey(1) == ord('q'):
#         break
# # When everything done, release the capture
# cap.release()
# cv.destroyAllWindows()

In [15]:
# import necessary packages

import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model

# initialize mediapipe
mpHands = mp.solutions.hands
mpFace = mp.solutions.face_detection
hands = mpHands.Hands(max_num_hands=2, min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh


# Load the gesture recognizer model
model = load_model('mp_hand_gesture')

# Load class names
f = open('gesture.names', 'r')
classNames = f.read().split('\n')
f.close()
print(classNames)


# Initialize the webcam
drawing_spec = mpDraw.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
with mpFace.FaceDetection(
    model_selection=0, min_detection_confidence=0.5) as face_detection:
    while cap.isOpened():
        success, frame = cap.read()

        if not success:
          print("Ignoring empty camera frame.")
          # If loading a video, use 'break' instead of 'continue'.
          continue
        # Read each frame from the webcam

#         display(frame)
        x, y, c = frame.shape

        # Flip the frame vertically
#         frame.flags.writeable = False
        frame = cv2.flip(frame, 2)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Get hand landmark prediction
        result = hands.process(frame)
        resultFace = face_detection.process(frame)
        # print(result)

        className = ''

        # post process the result
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        if result.multi_hand_landmarks:
            landmarks = []
            count = 0
            for handslms in result.multi_hand_landmarks:
                count +=1
                for lm in handslms.landmark:
                    # print(id, lm)
                    lmx = int(lm.x * x)
                    lmy = int(lm.y * y)

                    landmarks.append([lmx, lmy])
                    

                # Drawing landmarks on frames
                mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)
                if (count <= 1):
                    # Predict gesture
                    prediction = model.predict([landmarks])
                    # print(prediction)
                    classID = np.argmax(prediction)
                    className = classNames[classID]
                

        if resultFace.detections:
          for detection in resultFace.detections:
            mpDraw.draw_detection(frame, detection)

        # show the prediction on the frame
        cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 
                       1, (0,0,255), 2, cv2.LINE_AA)

        # Show the final output
        cv2.imshow("Output", frame) 

        if cv2.waitKey(1) == ord('q'):
            break

# release the webcam and destroy all active windows
cap.release()

cv2.destroyAllWindows()

['okay', 'peace', 'thumbs up', 'thumbs down', 'call me', 'stop', 'rock', 'live long', 'fist', 'smile']


In [10]:
# mp_face_detection = mp.solutions.face_detection
# mp_drawing = mp.solutions.drawing_utils
# cap = cv2.VideoCapture(0)
# with mp_face_detection.FaceDetection(
#     model_selection=0, min_detection_confidence=0.5) as face_detection:
#   while cap.isOpened():
#     success, image = cap.read()
#     if not success:
#       print("Ignoring empty camera frame.")
#       # If loading a video, use 'break' instead of 'continue'.
#       continue

#     # To improve performance, optionally mark the image as not writeable to
#     # pass by reference.
#     image.flags.writeable = False
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#     results = face_detection.process(image)

#     # Draw the face detection annotations on the image.
#     image.flags.writeable = True
#     image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
#     if results.detections:
#       for detection in results.detections:
#         mp_drawing.draw_detection(image, detection)
#     # Flip the image horizontally for a selfie-view display.
#     cv2.imshow('MediaPipe Face Detection', cv2.flip(image, 1))
#     if cv2.waitKey(1) == ord('q'):
#       break

In [33]:
# Face mesh code
# import necessary packages

import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model
import time

# initialize mediapipe
mpHands = mp.solutions.hands
mpFace = mp.solutions.face_detection
hands = mpHands.Hands(max_num_hands=2, min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh


# Load the gesture recognizer model
model = load_model('mp_hand_gesture')

# Load class names
f = open('gesture.names', 'r')
classNames = f.read().split('\n')
f.close()
print(classNames)


# Initialize the webcam
drawing_spec = mpDraw.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
with mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as face_mesh:
    while cap.isOpened():
        success, frame = cap.read()

        if not success:
          print("Ignoring empty camera frame.")
          # If loading a video, use 'break' instead of 'continue'.
          continue
        # Read each frame from the webcam

#         display(frame)
        x, y, c = frame.shape

        # Flip the frame vertically
        frame.flags.writeable = False
        frame = cv2.flip(frame, 2)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Get hand landmark prediction
        result = hands.process(frame)
        resultFace = face_mesh.process(frame)
        # print(result)

        className = ''

        # post process the result
        frame.flags.writeable = True
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        if result.multi_hand_landmarks:
            landmarks = []
            count = 0
            for handslms in result.multi_hand_landmarks:
                count +=1
                for lm in handslms.landmark:
                    # print(id, lm)
                    lmx = int(lm.x * x)
                    lmy = int(lm.y * y)

                    landmarks.append([lmx, lmy])
                    

                # Drawing landmarks on frames
                mpDraw.draw_landmarks(frame, handslms, mpHands.HAND_CONNECTIONS)
                if (count <= 1):
                    # Predict gesture
                    prediction = model.predict([landmarks])
                    # print(prediction)
                    classID = np.argmax(prediction)
                    className = classNames[classID]
                print("hands landmarks: ")
                display(handslms)

        if resultFace.multi_face_landmarks:
            for face_landmarks in resultFace.multi_face_landmarks:
                mpDraw.draw_landmarks(
                    image=frame,
                    landmark_list=face_landmarks,
                    connections=mp_face_mesh.FACEMESH_TESSELATION,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=mp_drawing_styles
                    .get_default_face_mesh_tesselation_style())
                mpDraw.draw_landmarks(
                    image=frame,
                    landmark_list=face_landmarks,
                    connections=mp_face_mesh.FACEMESH_CONTOURS,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=mp_drawing_styles
                    .get_default_face_mesh_contours_style())
                mpDraw.draw_landmarks(
                    image=frame,
                    landmark_list=face_landmarks,
                    connections=mp_face_mesh.FACEMESH_IRISES,
                    landmark_drawing_spec=None,
                    connection_drawing_spec=mp_drawing_styles
                    .get_default_face_mesh_iris_connections_style())
            print("face landmarks: ")
            display(face_landmarks)
        
        time.sleep(20)
            
        # show the prediction on the frame
        frame = cv2.flip(frame, 1)
        cv2.putText(frame, className, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 
                       1, (0,0,255), 2, cv2.LINE_AA)
        
        # Show the final output
#         cv2.imshow("Output", frame) 

        if cv2.waitKey(1) == ord('q'):
            break

# release the webcam and destroy all active windows
cap.release()

cv2.destroyAllWindows()

['okay', 'peace', 'thumbs up', 'thumbs down', 'call me', 'stop', 'rock', 'live long', 'fist', 'smile']


landmark {
  x: 0.40396758913993835
  y: 0.8417128920555115
  z: -0.0005663042538799345
}
landmark {
  x: 0.35643503069877625
  y: 0.8126320242881775
  z: -0.05583875998854637
}
landmark {
  x: 0.37104636430740356
  y: 0.8141172528266907
  z: -0.01699853502213955
}
landmark {
  x: 0.3524739444255829
  y: 0.7445682883262634
  z: -0.05724452808499336
}
landmark {
  x: 0.3541674017906189
  y: 0.7983358502388
  z: -0.06535825878381729
}
landmark {
  x: 0.3566763401031494
  y: 0.7715885043144226
  z: -0.0689651221036911
}
landmark {
  x: 0.3693912625312805
  y: 0.6942291855812073
  z: -0.06430024653673172
}
landmark {
  x: 0.3069118857383728
  y: 0.6349345445632935
  z: 0.01592210866510868
}
landmark {
  x: 0.372836172580719
  y: 0.6418540477752686
  z: -0.07437996566295624
}
landmark {
  x: 0.3718341588973999
  y: 0.6199484467506409
  z: -0.08712242543697357
}
landmark {
  x: 0.3757985234260559
  y: 0.5237737894058228
  z: -0.11418121308088303
}
landmark {
  x: 0.40345269441604614
  y: 0.8

KeyboardInterrupt: 