In [None]:
pip install opencv-python


In [None]:
pip install mediapipe

In [None]:
pip install --user numpy==1.24.3 --upgrade #probably with some limitation because of --user space installation

In [2]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
import cv2
import mediapipe as mp
import time

from google.protobuf.json_format import MessageToDict 

from numpy import linalg
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [None]:
# This part is only necessary to prepare your images (photos) for training
# Currently all images will be read from folder "data" and subfolders of it will act as labels
# please use numbers for subfolder names

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

#Save data            
f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

In [None]:
# This part is only necessary to TRAIN RandomForestClassifier on your pickle data

data_dict = pickle.load(open('./data.pickle', 'rb'))

# You can print it out to check if the data structure is homogeneous
### print(data_dict.keys())
### print(data_dict)

#Data and Labels are lists, so they have to be converted here
Data_type = object

data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

#Dataset is split here in 80% training and 20% test data
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

#Simple but very fast modeltype to train - good for older hardware/training on cpu
model = RandomForestClassifier()
model.fit(x_train, y_train)

y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)

print('{}% of samples are classified correctly '.format(score * 100))

#save model
f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

In [3]:
def are_finger_stretched(point1, point2, point3):
    outcome = 0
         #
    distance1 = np.linalg.norm((point2 - point1), ord=2)
    distance2 = np.linalg.norm((point3 - point1), ord=2)
    if distance2 > distance1:
        outcome = 1
    
    #check the result
    #print(outcome, '\n')
    return outcome

In [4]:
def detect_gesture(result):
    if (result[0] == 1) and (result[1] == 0) and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "good"
    elif (result[0] == 1) and (result[1] == 1)and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "full barre"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "one"
    elif (result[0] == 0) and (result[1] == 0)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "VERY BAD"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "two"
    elif (result[0] == 1) and (result[1] == 1)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "3 finger picking"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 0):
        gesture = "three"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "four"
    elif (result[0] == 1) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "five"
    elif (result[0] == 1) and (result[1] == 0)and (result[2] == 0) and (result[3] == 0) and (result[4] == 1):
        gesture = "Shaka"
    elif (result[0] == 0) and (result[1] == 0)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "OK or plectrum"
    elif(result[0] == 0) and (result[1] == 0) and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "closed"
    else:
        gesture = "unknown/not detected"
    
    return gesture

In [5]:

model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

labels_dict = {0: 'Minor1', 1: 'Minor2', 2: 'MajorToMoll1', 3: 'MajorToMoll2', 4: 'FullBarre1'}

width = 1280
height = 720

cap = cv2.VideoCapture(0)

# Delete following settings and replace resize function for more FPS
cap.set(cv2.CAP_PROP_FRAME_WIDTH,width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT,height)
cap.set(cv2.CAP_PROP_FPS, 30)

hands = mp.solutions.hands.Hands(static_image_mode=False,
                         max_num_hands=2,
                         min_tracking_confidence=0.5,
                         min_detection_confidence=0.5)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

previousTime = 0
currentTime = 0

# Array for 5 fingers as figure
figure = [0 for element in range(5)]

# Array for 0-20 trackpoints and 2 hands
landmark = np.empty((21, 2))

while True:
    _, frame = cap.read()
    
    #Horizontally flip the image to get right hand for the right and left for the left hand
    frame = cv2.flip(frame, 1)
    frame = cv2.resize(frame,(width,height))
    
    #Convert image from webcam back to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    #Should improve performance, but no difference to see
    #frame_rgb.flags.writeable = False
    
    results = hands.process(frame_rgb)
    
    if results.multi_hand_landmarks:
        #frame_rgb.flags.writeable = True
        for handsnumber, lms in enumerate(results.multi_hand_landmarks):
            mp_drawing.draw_landmarks(frame, lms, mp_hands.HAND_CONNECTIONS)
            
            for i in results.multi_handedness:
                
                # writes the information like index, score and label of hand
                label = MessageToDict(i)[
                    'classification'][0]['label']
                
                
                if label == 'Left':
                    
                    # If left hand is recognized it will  
                    data_aux = []
                    x_ = []
                    y_ = []

                    H, W, _ = frame.shape

                    for hand_landmarks in results.multi_hand_landmarks:
                    
                    # alternative CODE in COMMENT is not finished yet
                    #for idx, classification in enumerate(results.multi_handedness):
                    #    if classification.classification[0].index == 0:
                            
                            mp_drawing.draw_landmarks(
                                frame,  # image to draw
                                hand_landmarks,  # model output
                                mp_hands.HAND_CONNECTIONS,  # hand connections
                                mp_drawing_styles.get_default_hand_landmarks_style(),
                                mp_drawing_styles.get_default_hand_connections_style())

                            for i in range(len(hand_landmarks.landmark)):
                                x = hand_landmarks.landmark[i].x
                                y = hand_landmarks.landmark[i].y
                                x_.append(x)
                                y_.append(y)

                            for i in range(len(hand_landmarks.landmark)):
                                x = hand_landmarks.landmark[i].x
                                y = hand_landmarks.landmark[i].y
                                data_aux.append(x - min(x_))
                                data_aux.append(y - min(y_))

                            x1 = int(min(x_) * W) - 10
                            y1 = int(min(y_) * H) - 10

                            x2 = int(max(x_) * W) - 10
                            y2 = int(max(y_) * H) - 10

                            if (len(data_aux) == 42):
                                prediction = model.predict([np.asarray(data_aux)])
                                predicted_character = labels_dict[int(prediction[0])]

                                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
                                cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                                                cv2.LINE_AA)
                            
                 
                    
                if label == 'Right':
            
                    #for id, lm in enumerate(lms.landmark):
                    if len(results.multi_hand_landmarks) > 1:
                        for id, lm in enumerate(results.multi_hand_landmarks[1].landmark): #ONLY FOR ONE HAND WITHOUT SECOND for LOOP

                            h, w, _ = frame.shape
                            cx = int(lm.x * w) 
                            cy = int(lm.y * h)
                            cv2.circle(frame, (cx, cy), 3, (128, 128, 128))

                            landmark_ = [cx, cy]
                            landmark[id,:] = landmark_
                            #print(id)
                            #print(lm)

                        #Marks the top of fingers with different colors and thickness
                            if id == 4:
                                cv2.circle(frame, (cx, cy), 6, (0, 0, 0), cv2.FILLED)
                            if id == 8:
                                cv2.circle(frame, (cx, cy), 12, (255, 0, 0), cv2.FILLED)
                            if id == 12:
                                cv2.circle(frame, (cx, cy), 12, (0, 255, 0), cv2.FILLED)
                            if id == 16:
                                cv2.circle(frame, (cx, cy), 12, (0, 0, 255), cv2.FILLED)
                            if id == 20:
                                cv2.circle(frame, (cx, cy), 12, (255, 0, 255), cv2.FILLED)

                        for i in range(5):
                            if i == 0:
                                #pinky
                                figure_ = are_finger_stretched(landmark[17],landmark[4*i+2],landmark[4*i+4])
                            else:
                                #wrest
                                figure_ = are_finger_stretched(landmark[0],landmark[4*i+2],landmark[4*i+4])

                            figure[i] = figure_
                        #print(figure, '\n')

                        gesture_solution = detect_gesture(figure)
                        cv2.putText(frame, f"{gesture_solution}", (240, 35*(handsnumber+1)), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 5)

            
    #Shows Frames per Second
        currentTime = time.time()
        fps = 1/(currentTime-previousTime)
        previousTime = currentTime        
        cv2.putText(frame, str(int(fps)), (10, 35), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3) #Pos,Font,Scale,Color,Thick
            
        cv2.imshow("Hand Tracking", frame) #Name of Window and the Live-Image
        
    if cv2.waitKey(1) == ord('q') : 
        #quit with Q
        break
cap.release()
cv2.destroyAllWindows()

In [None]:
# Alternative Method to save information about left and right hand
# This should be studied in case, if 1 and 2 approach does NOT WORK

"""
import cv2

class mpHands:
    import mediapipe as mp
    
    def __init__(self,maxHands=2,tol1=.5,tol2=.5):
        self.hands=self.mp.solutions.hands.Hands(False,maxHands,tol1,tol2)
        
    def Marks(self,frame):
        myHands=[]
        handsType=[]
        frameRGB=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
        results=self.hands.process(frameRGB)
        if results.multi_hand_landmarks != None:
            #print(results.multi_handedness)
            for hand in results.multi_handedness:
                #print(hand)
                #print(hand.classification)
                #print(hand.classification[0])
                handType=hand.classification[0].label
                handsType.append(handType)
            for handLandMarks in results.multi_hand_landmarks:
                myHand=[]
                for landMark in handLandMarks.landmark:
                    myHand.append((int(landMark.x*width),int(landMark.y*height)))
                myHands.append(myHand)
        return myHands,handsType
 
#width=1280
#height=720
#cam=cv2.VideoCapture(0,cv2.CAP_DSHOW)
#cam.set(cv2.CAP_PROP_FRAME_WIDTH, width)
#cam.set(cv2.CAP_PROP_FRAME_HEIGHT,height)
#cam.set(cv2.CAP_PROP_FPS, 30)
#cam.set(cv2.CAP_PROP_FOURCC,cv2.VideoWriter_fourcc(*'MJPG'))

cam = cv2.VideoCapture(0)
findHands=mpHands(2)

while True:
    ignore,  frame = cam.read()
    frame=cv2.flip(frame, 1)
    handData, handsType=findHands.Marks(frame)
    for hand,handType in zip(handData,handsType):
        if handType=='Right':
            handColor=(255,0,0)
        if handType=='Left':
            handColor=(0,0,255)
        for ind in [0,5,6,7,8]:
            cv2.circle(frame,hand[ind],15,handColor,5)
    cv2.imshow('Name of window', frame)
    cv2.moveWindow('Name of window',0,0)
    if cv2.waitKey(1) & 0xff ==ord('q'):
        break
cam.release()

"""

In [None]:
#Backup for RandomForest Approach trained for 42 Parameters (= 1 Hand)
'''
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

cap = cv2.VideoCapture(0)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

labels_dict = {0: 'Minor1', 1: 'Minor2', 2: 'MajorToMoll1', 3: 'MajorToMoll2', 4: 'FullBarre1'}


while True:

    data_aux = []
    x_ = []
    y_ = []

    ret, frame = cap.read()

    H, W, _ = frame.shape

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                x_.append(x)
                y_.append(y)

            for i in range(len(hand_landmarks.landmark)):
                
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x - min(x_))
                data_aux.append(y - min(y_))

        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) - 10
        y2 = int(max(y_) * H) - 10

        prediction = model.predict([np.asarray(data_aux)])
        predicted_character = labels_dict[int(prediction[0])]

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
        cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                    cv2.LINE_AA)
                            

    cv2.imshow('frame', frame)
    cv2.waitKey(1)


cap.release()
cv2.destroyAllWindows()
'''