In [13]:
import mediapipe as mp
import cv2
from dollarpy import Recognizer, Template, Point

mp_drawing = mp.solutions.drawing_utils  # Drawing helpers
mp_hands = mp.solutions.hands  # Mediapipe Hands

In [14]:
templates = []

In [15]:
def getHandPoints(videoURL, label):
    cap = cv2.VideoCapture(videoURL)

    # Initiate hands model
    with mp_hands.Hands(max_num_hands = 1, min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
        points = []
        wrist = []
        left_thumb_tip = []
        left_thumb_mcp = []
        left_index_tip = []
        left_index_mcp = []
        left_middle_tip = []
        left_middle_mcp = []
        left_ring_tip = []
        left_ring_mcp = []
        left_pinky_tip = []
        left_pinky_mcp = []

        while cap.isOpened():
            ret, frame = cap.read()

            if ret==True:
                # Recolor Feed
                image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image.flags.writeable = False

                # Make Detections
                results = hands.process(image)

                # Recolor image back to BGR for rendering
                image.flags.writeable = True
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # Draw landmarks on frame
                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(
                            image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                            mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                        )

                    if results.multi_hand_landmarks:
                        wrist.append(Point(results.multi_hand_landmarks[0].landmark[0].x, results.multi_hand_landmarks[0].landmark[0].y, 1))

                        left_thumb_tip.append(Point(results.multi_hand_landmarks[0].landmark[4].x, results.multi_hand_landmarks[0].landmark[4].y, 2))
                        left_thumb_mcp.append(Point(results.multi_hand_landmarks[0].landmark[2].x, results.multi_hand_landmarks[0].landmark[2].y, 3))

                        left_index_tip.append(Point(results.multi_hand_landmarks[0].landmark[8].x, results.multi_hand_landmarks[0].landmark[8].y, 4))
                        left_index_mcp.append(Point(results.multi_hand_landmarks[0].landmark[5].x, results.multi_hand_landmarks[0].landmark[5].y, 5))

                        left_middle_tip.append(Point(results.multi_hand_landmarks[0].landmark[12].x, results.multi_hand_landmarks[0].landmark[12].y, 6))
                        left_middle_mcp.append(Point(results.multi_hand_landmarks[0].landmark[9].x, results.multi_hand_landmarks[0].landmark[9].y, 7))

                        left_ring_tip.append(Point(results.multi_hand_landmarks[0].landmark[16].x, results.multi_hand_landmarks[0].landmark[16].y, 8))
                        left_ring_mcp.append(Point(results.multi_hand_landmarks[0].landmark[13].x, results.multi_hand_landmarks[0].landmark[13].y, 9))

                        left_pinky_tip.append(Point(results.multi_hand_landmarks[0].landmark[20].x, results.multi_hand_landmarks[0].landmark[20].y, 10))
                        left_pinky_mcp.append(Point(results.multi_hand_landmarks[0].landmark[17].x, results.multi_hand_landmarks[0].landmark[17].y, 11))

                cv2.imshow(label, image)

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

        # Combine all points for $1 recognizer
        points = wrist + left_thumb_tip + left_thumb_mcp + left_index_tip + left_index_mcp + left_middle_tip + left_middle_mcp + left_ring_tip + left_ring_mcp + left_pinky_tip + left_pinky_mcp
        print(label)
        return points

In [16]:
vid = "vids/down/d_ali.mp4"
points = getHandPoints(vid,"Down") 
tmpl_2 = Template('Down', points)
templates.append(tmpl_2)


# vid = "vids/down/d_bakar.mp4"
# points = getHandPoints(vid,"Down") 
# tmpl_2 = Template('Down', points)
# templates.append(tmpl_2)


# vid = "vids/down/d_fouad.mp4"
# points = getHandPoints(vid,"Down") 
# tmpl_2 = Template('Down', points)
# templates.append(tmpl_2)


# vid = "vids/down/d_kamal.mp4"
# points = getHandPoints(vid,"Down") 
# tmpl_2 = Template('Down', points)
# templates.append(tmpl_2)


vid = "vids/down/d_marwan.mp4"
points = getHandPoints(vid,"Down") 
tmpl_2 = Template('Down', points)
templates.append(tmpl_2)

vid = "vids/down/d_kenzy.mp4"
points = getHandPoints(vid,"Down") 
tmpl_2 = Template('Down', points)
templates.append(tmpl_2)


Down
Down
Down


In [17]:
vid = "vids/left/l_ali.mp4"
points = getHandPoints(vid,"Left") 
tmpl_2 = Template('Left', points)
templates.append(tmpl_2)


# vid = "vids/left/l_bakar.mp4"
# points = getHandPoints(vid,"Left") 
# tmpl_2 = Template('Left', points)
# templates.append(tmpl_2)


# vid = "vids/left/l_fouad.mp4"
# points = getHandPoints(vid,"Left") 
# tmpl_2 = Template('Left', points)
# templates.append(tmpl_2)


# vid = "vids/left/l_kamal.mp4"
# points = getHandPoints(vid,"Left") 
# tmpl_2 = Template('Left', points)
# templates.append(tmpl_2)


vid = "vids/left/l_marwan.mp4"
points = getHandPoints(vid,"Left") 
tmpl_2 = Template('Left', points)
templates.append(tmpl_2)

vid = "vids/left/l_kenzy.mp4"
points = getHandPoints(vid,"Left") 
tmpl_2 = Template('Left', points)
templates.append(tmpl_2)


Left
Left
Left


In [18]:
vid = "vids/right/r_ali.mp4"
points = getHandPoints(vid,"Right") 
tmpl_2 = Template('Right', points)
templates.append(tmpl_2)


# vid = "vids/right/r_bakar.mp4"
# points = getHandPoints(vid,"Right") 
# tmpl_2 = Template('Right', points)
# templates.append(tmpl_2)


# vid = "vids/right/r_fouad.mp4"
# points = getHandPoints(vid,"Right") 
# tmpl_2 = Template('Right', points)
# templates.append(tmpl_2)


# vid = "vids/right/r_kamal.mp4"
# points = getHandPoints(vid,"Right") 
# tmpl_2 = Template('Right', points)
# templates.append(tmpl_2)


vid = "vids/right/r_marwan.mp4"
points = getHandPoints(vid,"Right") 
tmpl_2 = Template('Right', points)
templates.append(tmpl_2)

vid = "vids/right/r_kenzy.mp4"
points = getHandPoints(vid,"Right") 
tmpl_2 = Template('Right', points)
templates.append(tmpl_2)


Right
Right
Right


In [19]:
vid = "vids/up/u_ali.mp4"
points = getHandPoints(vid,"Up") 
tmpl_2 = Template('Up', points)
templates.append(tmpl_2)


# vid = "vids/up/u_bakar.mp4"
# points = getHandPoints(vid,"Up") 
# tmpl_2 = Template('Up', points)
# templates.append(tmpl_2)


# vid = "vids/up/u_fouad.mp4"
# points = getHandPoints(vid,"Up") 
# tmpl_2 = Template('Up', points)
# templates.append(tmpl_2)


# vid = "vids/up/u_kamal.mp4"
# points = getHandPoints(vid,"Up") 
# tmpl_2 = Template('Up', points)
# templates.append(tmpl_2)


vid = "vids/up/u_marwan.mp4"
points = getHandPoints(vid,"Up") 
tmpl_2 = Template('Up', points)
templates.append(tmpl_2)

vid = "vids/up/u_kenzy.mp4"
points = getHandPoints(vid,"Up") 
tmpl_2 = Template('Up', points)
templates.append(tmpl_2)


Up
Up
Up


In [20]:
possibleOutcomes = ["Left", "Right", "Up", "Down"]

In [11]:
import socket

soc = socket.socket()
hostname = "localhost"
port = 65436
soc.bind((hostname, port))
soc.listen(5)

print("Waiting for a connection...")
conn, addr = soc.accept()
print("Device connected: " + str(addr))


Waiting for a connection...
Device connected: ('127.0.0.1', 62720)


In [None]:
def real_time_hand_pose_detection(videoURL=0):
    recognizer = Recognizer(templates)
    cap = cv2.VideoCapture(videoURL)

    # Initiate hands model
    with mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7) as hands:
        wrist = []
        left_thumb_tip = []
        left_thumb_mcp = []
        left_index_tip = []
        left_index_mcp = []
        left_middle_tip = []
        left_middle_mcp = []
        left_ring_tip = []
        left_ring_mcp = []
        left_pinky_tip = []
        left_pinky_mcp = []

        while cap.isOpened():
            ret, frame = cap.read()

            if ret:
                image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image.flags.writeable = False 

                # Detect hands
                results = hands.process(image)

                # Recolor image back to BGR for rendering
                image.flags.writeable = True
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                gesture_name = "No Hands Detected"
                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        # Draw landmarks
                        mp_drawing.draw_landmarks(
                            image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                            mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                        )

                        if results.multi_hand_landmarks:
                            wrist.append(Point(results.multi_hand_landmarks[0].landmark[0].x, results.multi_hand_landmarks[0].landmark[0].y, 1))

                            left_thumb_tip.append(Point(results.multi_hand_landmarks[0].landmark[4].x, results.multi_hand_landmarks[0].landmark[4].y, 2))
                            left_thumb_mcp.append(Point(results.multi_hand_landmarks[0].landmark[2].x, results.multi_hand_landmarks[0].landmark[2].y, 3))

                            left_index_tip.append(Point(results.multi_hand_landmarks[0].landmark[8].x, results.multi_hand_landmarks[0].landmark[8].y, 4))
                            left_index_mcp.append(Point(results.multi_hand_landmarks[0].landmark[5].x, results.multi_hand_landmarks[0].landmark[5].y, 5))

                            left_middle_tip.append(Point(results.multi_hand_landmarks[0].landmark[12].x, results.multi_hand_landmarks[0].landmark[12].y, 6))
                            left_middle_mcp.append(Point(results.multi_hand_landmarks[0].landmark[9].x, results.multi_hand_landmarks[0].landmark[9].y, 7))

                            left_ring_tip.append(Point(results.multi_hand_landmarks[0].landmark[16].x, results.multi_hand_landmarks[0].landmark[16].y, 8))
                            left_ring_mcp.append(Point(results.multi_hand_landmarks[0].landmark[13].x, results.multi_hand_landmarks[0].landmark[13].y, 9))

                            left_pinky_tip.append(Point(results.multi_hand_landmarks[0].landmark[20].x, results.multi_hand_landmarks[0].landmark[20].y, 10))
                            left_pinky_mcp.append(Point(results.multi_hand_landmarks[0].landmark[17].x, results.multi_hand_landmarks[0].landmark[17].y, 11))

                            if len(wrist) > 20:
                                wrist.pop(0)

                                left_thumb_tip.pop(0)
                                left_thumb_mcp.pop(0)

                                left_index_tip.pop(0)
                                left_index_mcp.pop(0)

                                left_middle_tip.pop(0)
                                left_middle_mcp.pop(0)

                                left_ring_tip.pop(0)
                                left_ring_mcp.pop(0)

                                left_pinky_tip.pop(0)
                                left_pinky_mcp.pop(0)

                                points = wrist + left_thumb_tip + left_thumb_mcp + left_index_tip + left_index_mcp + left_middle_tip + left_middle_mcp + left_ring_tip + left_ring_mcp + left_pinky_tip + left_pinky_mcp

                                result = recognizer.recognize(points)
                                gesture_name = result[0]



                # Display gesture name on frame
                cv2.putText(image, f'Gesture: {gesture_name}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
                cv2.imshow("Real-Time Hand Gesture Detection", image)

                try:
                    if gesture_name.strip():
                        if gesture_name in possibleOutcomes:
                            msg = (gesture_name + "\n").encode("ASCII")
                            try:
                                conn.send(msg)
                            except BrokenPipeError:
                                print("Client disconnected. Stopping server.")
                                break
                    else:
                        print("Please enter a valid direction.")
                except Exception as e:
                    print(f"An error occurred: {e}")

            if cv2.waitKey(10) & 0xFF == ord('q'):
                conn.close()
                soc.close()
                break

        cap.release()
        cv2.destroyAllWindows()

# Example usage:
real_time_hand_pose_detection(0)
