In [None]:
pip install opencv-python


In [None]:
pip install mediapipe

In [None]:
pip install --user numpy==1.24.3 --upgrade #probably with some limitation because of --user space installation

In [None]:
pip install scikit-learn==1.0.2 

In [None]:
pip install pygame

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
import cv2
import mediapipe as mp
import time
import asyncio
import pygame

from google.protobuf.json_format import MessageToDict 

from numpy import linalg
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [None]:
# If you don't want to use data.pickle and pretrained model.p of this project
# Then you have to prepare your images (photos) with one (chord-)hand for training
# Use function in next-to-last cell to create pickle data from your dataset

In [None]:
# Then use the function in last cell to train RandomForestClassifier on your pickle data
# It will create model.p

In [None]:
import random

pygame.init()

def play_chord(chord_name):

    if not pygame.mixer.get_busy():
        chord_map= {
            'Minor1': ['AMoll_7c', 'AisMoll_7c', 'HMoll_7c', 'CMoll_7c', 'CisMoll_7c', 'DMoll_7c'],
            'Minor2': ['AMoll_7c', 'CisMoll_7c', 'DMoll_7c', 'DisMoll_7c'],
            'Minor3': ['FMoll_7c', 'FisMoll_7c', 'GMoll_7c', 'GisMoll_7c'],
            'MajorToMinor': ['EMoll_7c'],
            'Major2Minor': ['EMoll_7c'],
            'FullBarre': ['Fullbarre_CDur_7c', 'Fullbarre_DDur_7c', 'Fullbarre_EDur_7c', 'Fullbarre_FDur_7c', 'Fullbarre_GDur_7c', 'Fullbarre_ADur_7c', 'Fullbarre_HDur_7c'],
            'Barre': ['CisDur_7c', 'DisDur_7c', 'EDur_7c', 'FisDur_7c', 'GDur_7c', 'GisDur_7c', 'AisDur_7c', 'HDur_7c'],
        }
        
        if chord_name not in chord_map:
            print(f'Chord {chord_name} not found.')
            return
        
        possible_chords = chord_map[chord_name]
        selected_chord = random.choice(possible_chords)
        filename = './chords/'+str(selected_chord)+'.wav'
        pygame.mixer.Sound(filename).play()


In [None]:
play_chord('Barre')

In [None]:
def are_finger_stretched(point1, point2, point3):
    outcome = 0
         #
    distance1 = np.linalg.norm((point2 - point1), ord=2)
    distance2 = np.linalg.norm((point3 - point1), ord=2)
    if distance2 > distance1:
        outcome = 1
    
    #check the result
    #print(outcome, '\n')
    return outcome

In [None]:
def detect_gesture(result):
    if (result[0] == 1) and (result[1] == 0) and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "good"
    elif (result[0] == 1) and (result[1] == 1)and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "Travis picking"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "one"
    elif (result[0] == 0) and (result[1] == 0)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "VERY BAD"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "two"
    elif (result[0] == 1) and (result[1] == 1)and (result[2] == 1) and (result[3] == 0) and (result[4] == 0):
        gesture = "3 finger picking"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 0):
        gesture = "three"
    elif (result[0] == 0) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "four"
    elif (result[0] == 1) and (result[1] == 1)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "five"
    elif (result[0] == 1) and (result[1] == 0)and (result[2] == 0) and (result[3] == 0) and (result[4] == 1):
        gesture = "Shaka"
    elif (result[0] == 0) and (result[1] == 0)and (result[2] == 1) and (result[3] == 1) and (result[4] == 1):
        gesture = "OK or plectrum"
    elif(result[0] == 0) and (result[1] == 0) and (result[2] == 0) and (result[3] == 0) and (result[4] == 0):
        gesture = "closed"
    else:
        gesture = "unknown/not detected"
    
    return gesture

In [None]:

    model_dict = pickle.load(open('./model.p', 'rb'))
    model = model_dict['model']
    
    labels_dict = {0: 'Minor1', 1: 'Minor2', 2: 'Minor3', 3: 'MajorToMinor', 4: 'Major2Minor', 5: 'FullBarre', 6: 'Barre'}
    
    width = 1280
    height = 720
    
    cap = cv2.VideoCapture(0)
    
    # Delete following settings and replace resize function for more FPS
    cap.set(cv2.CAP_PROP_FRAME_WIDTH,width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT,height)
    cap.set(cv2.CAP_PROP_FPS, 30)
    
    hands = mp.solutions.hands.Hands(static_image_mode=False,
                             max_num_hands=2,
                             min_tracking_confidence=0.5,
                             min_detection_confidence=0.5)
    
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles
    
    previousTime = 0
    currentTime = 0
    
    ready = False
    
    # Array for 5 fingers as figure
    figure = [0 for element in range(5)]
    
    # Array for 0-20 trackpoints and 2 hands
    landmark = np.empty((21, 2))
    
    while True:
        _, frame = cap.read()
        
        #Horizontally flip the image to get right hand for the right and left for the left hand
        frame = cv2.flip(frame, 1)
        frame = cv2.resize(frame,(width,height))
        
        #Convert image from webcam back to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        #Should improve performance, but no difference to see
        #frame_rgb.flags.writeable = False
        
        results = hands.process(frame_rgb)
        
        if results.multi_hand_landmarks:
            #frame_rgb.flags.writeable = True
            for handsnumber, lms in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(frame, lms, mp_hands.HAND_CONNECTIONS)
                
                for i in results.multi_handedness:
                    
                    # writes the information like index, score and label of hand
                    label = MessageToDict(i)[
                        'classification'][0]['label']
                    
                    
                    if label == 'Left':
                        
                        # If left hand is recognized it will  
                        data_aux = []
                        x_ = []
                        y_ = []
    
                        H, W, _ = frame.shape
    
                        for hand_landmarks in results.multi_hand_landmarks:
                        
                        # alternative CODE in COMMENT is not finished yet
                        #for idx, classification in enumerate(results.multi_handedness):
                        #    if classification.classification[0].index == 0:
                                
                                mp_drawing.draw_landmarks(
                                    frame,  # image to draw
                                    hand_landmarks,  # model output
                                    mp_hands.HAND_CONNECTIONS,  # hand connections
                                    mp_drawing_styles.get_default_hand_landmarks_style(),
                                    mp_drawing_styles.get_default_hand_connections_style())
    
                                for i in range(len(hand_landmarks.landmark)):
                                    x = hand_landmarks.landmark[i].x
                                    y = hand_landmarks.landmark[i].y
                                    x_.append(x)
                                    y_.append(y)
    
                                for i in range(len(hand_landmarks.landmark)):
                                    x = hand_landmarks.landmark[i].x
                                    y = hand_landmarks.landmark[i].y
                                    data_aux.append(x - min(x_))
                                    data_aux.append(y - min(y_))
    
                                x1 = int(min(x_) * W) - 10
                                y1 = int(min(y_) * H) - 10
    
                                x2 = int(max(x_) * W) - 10
                                y2 = int(max(y_) * H) - 10
    
                                if (len(data_aux) == 42):
                                    prediction = model.predict([np.asarray(data_aux)])
                                    predicted_character = labels_dict[int(prediction[0])]
    
                                    if ready:
                                        play_chord(predicted_character)
    
                                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
                                    cv2.putText(frame, predicted_character, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                                                    cv2.LINE_AA)
                     
                        
                    if label == 'Right':
                
                        #for id, lm in enumerate(lms.landmark):
                        if len(results.multi_hand_landmarks) > 1:
                            for id, lm in enumerate(results.multi_hand_landmarks[1].landmark): #ONLY FOR ONE HAND WITHOUT SECOND for LOOP
    
                                h, w, _ = frame.shape
                                cx = int(lm.x * w) 
                                cy = int(lm.y * h)
                                cv2.circle(frame, (cx, cy), 3, (128, 128, 128))
    
                                landmark_ = [cx, cy]
                                landmark[id,:] = landmark_
                                #print(id)
                                #print(lm)
    
                            #Marks the top of fingers with different colors and thickness
                                if id == 4:
                                    cv2.circle(frame, (cx, cy), 6, (0, 0, 0), cv2.FILLED)
                                if id == 8:
                                    cv2.circle(frame, (cx, cy), 12, (255, 0, 0), cv2.FILLED)
                                if id == 12:
                                    cv2.circle(frame, (cx, cy), 12, (0, 255, 0), cv2.FILLED)
                                if id == 16:
                                    cv2.circle(frame, (cx, cy), 12, (0, 0, 255), cv2.FILLED)
                                if id == 20:
                                    cv2.circle(frame, (cx, cy), 12, (255, 0, 255), cv2.FILLED)
    
                            for i in range(5):
                                if i == 0:
                                    #pinky
                                    figure_ = are_finger_stretched(landmark[17],landmark[4*i+2],landmark[4*i+4])
                                else:
                                    #wrest
                                    figure_ = are_finger_stretched(landmark[0],landmark[4*i+2],landmark[4*i+4])
    
                                figure[i] = figure_
                            #print(figure, '\n')
    
                            gesture_solution = detect_gesture(figure)
                            if gesture_solution == "OK or plectrum":
                                ready = True
                            if gesture_solution == "Shaka":
                                ready = False
                            cv2.putText(frame, f"{gesture_solution}", (240, 18*(handsnumber)), cv2.FONT_HERSHEY_PLAIN, 2, (128, 255, 128), 5)
              
        #Shows Frames per Second
            currentTime = time.time()
            fps = 1/(currentTime-previousTime)
            previousTime = currentTime        
            cv2.putText(frame, str(int(fps)), (10, 35), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3) #Pos,Font,Scale,Color,Thick
                
            cv2.imshow("Hand Tracking", frame) #Name of Window and the Live-Image
            
        if cv2.waitKey(1) == ord('q') : 
            #quit with Q
            break
    cap.release()
    cv2.destroyAllWindows()

In [None]:
#
#For finger angle calculation - not finished yet
#
"""
def draw_finger_angles(frame, results, joint_list):
    
    # Loop through hands
    for hand in results.multi_hand_landmarks:
        #Loop through joint sets 
        for joint in joint_list:
            a = np.array([lm.landmark[joint[0]].x, lm.landmark[joint[0]].y]) # First coord
            b = np.array([lm.landmark[joint[1]].x, lm.landmark[joint[1]].y]) # Second coord
            c = np.array([lm.landmark[joint[2]].x, lm.landmark[joint[2]].y]) # Third coord
            
            radians = np.arctan2(c[1] - b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
            angle = np.abs(radians*180.0/np.pi)
            
            if angle > 180.0:
                angle = 360-angle
                
            cv2.putText(frame, str(round(angle, 2)), tuple(np.multiply(b, [640, 480]).astype(int)),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA)
    return frame
"""

In [None]:
import statistics

class Joint:
    def __init__(self, name,XValue, YValue):
     self.name = name
     self.XValue = XValue
     self.YValue = YValue

    def get_name(self):
     return self.name
    
class Finger:
    def __init__(self, name, joints):
        self.name = name
        self.joints = []  # Initialize joints as an empty list
        self.set_joints(joints)  # Call set_joints method to set the joints

    def get_name(self):
        return self.name

    def get_joints(self):
        # Check if any joint is None
        for joint in self.joints:
            if joint is None:
                return False
        return True

    def set_joints(self, joints):
        self.joints.extend(joints)

     
class Handie:
    def __init__(self, frame, fingers):
        self.frame = frame
        self.fingers = fingers

    def get_frame(self):
        return self.frame
    



mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = '/Users/pc/Desktop/Gestenerkennung/ComputerVisionEngineerYoutube/Gestenerkennung/Datensatz_Kamera/Dur'




data = []
labels = []
frames = []
handies = []


Thumb = None
Index =  None
Middle =  None
Ring =  None
Pinky =  None

for dir_ in os.listdir(DATA_DIR):
    if dir_ == ".DS_Store":
        print("Skipping unwanted file in {dir_} directory: {img_path}")
        continue

    if dir_ == ".thumbs.db" or dir_ == "desktop.ini" :
        print("Skipping unwanted file in {dir_} directory: {img_path}")
        continue
    
    
    string_length = len(dir_)   
    substringdir_ = dir_[1: string_length] 

    full_dir_path = os.path.join(DATA_DIR,dir_)

   

    frame_count = 0
    framecounter=0
    jointcounter=0
    if full_dir_path == "/Users/pc/Desktop/Gestenerkennung/ComputerVisionEngineerYoutube/Gestenerkennung/Datensatz_Kamera/Dur/EDur_7c.mp4":
     
    


     cap=cv2.VideoCapture(full_dir_path)
     if(cap.isOpened()==False):
        print("couldnt open File")
        break
     while(cap.isOpened()):
      ret, frame = cap.read() 
      if ret == True: 
    # Display the resulting frame 
       if frame_count%2==0:
      
        frames.append(frame_count)
        img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(img_rgb)
        temporaljoints=[]
        if results.multi_hand_landmarks:
          for hand_landmarks in results.multi_hand_landmarks:
             for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
           
                
                if i>=1 and i<=4:
                  yoint=Joint("thumbjoint",x,y)
                  temporaljoints.append(yoint)
                #create Thumbbone
                if  i==4:
                  Thumb=Finger("thumb", temporaljoints) 
                  temporaljoints=[] 
                if i>4 and i<=8:
                  yoint=Joint("indexjoint",x,y)
                  temporaljoints.append(yoint)
                #createindexbone
                if i==8:
                    Index=Finger("index", temporaljoints)
                    temporaljoints=[]
                if i>8 and i<=12:
                  yoint=Joint("middlejoint",x,y)
                  temporaljoints.append(yoint)
                #createmiddlebone  
                if i==12:
                    Middle=Finger("middle", temporaljoints)
                    temporaljoints=[]   
                if i>12 and i<=16:
                  joint=Joint("ringjoint",x,y)
                  temporaljoints.append(yoint)
                #createringbone  
                if i==12:
                    Ring=Finger("ring", temporaljoints)
                    temporaljoints=[]   
                if i>16 and i<=20:
                  joint=Joint("pinkyjoint",x,y)
                  temporaljoints.append(yoint)
                  
                  if i==20:
                   Pinky=Finger("pinky", temporaljoints)
                   
                   temporaljoints=[]   
                if(framecounter !=frame_count and i==jointcounter):
                 jointcounter+=4
                 connectedjoints=[]
                 connectedjoints.append(Thumb)
                 connectedjoints.append(Index)
                 connectedjoints.append(Middle)
                 connectedjoints.append(Ring)
                 connectedjoints.append(Pinky)
                 print(connectedjoints[0].name)
                 currentHand=Handie(framecounter,connectedjoints)
                 handies.append(currentHand)
                 framecounter=frame_count
                 currentHand=None
                    
        cv2.imshow('Image', frame)
       

       frame_count+=1
       delay = int(1000 / cap.get(cv2.CAP_PROP_FPS))  # Calculate delay based on video frame rate
       key = cv2.waitKey(delay)
  
       #count total number of frames and then close instances
       if frame_count ==int(cap.get(cv2.CAP_PROP_FRAME_COUNT)):
        print(frame_count)
        break
    
cap.release() 


def extractbonedata(allhanddata, fingernumber,jointnumber):
   framex=[]
   framey=[]
   colors=[]
   greyscaleint=0
   for handie in allhanddata:
     greyscaleint+=100
     colors.append(greyscaleint)
     framex.append(handie.fingers[fingernumber].joints[jointnumber].XValue)
     framey.append(handie.fingers[fingernumber].joints[jointnumber].YValue)
     
   return framex,framey,colors


def calculateScatterplott(XValues, YValues, colors):
  Xmax_value = max(XValues)
  Xmin_value = min(XValues)

  ymax_value = max(YValues)
  ymin_value = min(YValues)

  plt.gcf().set_facecolor('lightblue')

  fig = plt.figure()
  ax = fig.add_subplot(1, 1, 1) # nrows, ncols, index

  ax.set_facecolor('xkcd:lightblue')
  ax.set_facecolor((0.2, 0.47, 0.42))
  plt.scatter(XValues, YValues, c=colors, cmap='gray')
  Xstd_dev = statistics.stdev(XValues)
  Ystd_dev = statistics.stdev(YValues)
  # Add labels and title
  plt.xlabel('X Values')
  plt.ylabel('Y Values')
  plt.title('Std_x:' +str(Xstd_dev)+" Std_y:"+str(Ystd_dev))


  plt.xlim(Xmin_value-0.01, Xmax_value+0.01)
  plt.ylim(ymin_value-0.01,ymax_value+0.01)

# Show plot
  plt.colorbar(label='Time')  # Add color bar to indicate intensity values
  plt.show()


print("value beingextracted")
counthands=0
for x in range(4):
  for y in range(3):
   values=extractbonedata(handies,x,y)
   calculateScatterplott(values[0],values[1],values[2])
   counthands+=1

In [None]:
# This part is only necessary to prepare your images (photos) for training
# Currently all images will be read from folder "data" and subfolders of it will act as labels
# please use numbers for subfolder names

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = './data'

data = []
labels = []
for dir_ in os.listdir(DATA_DIR):
    for img_path in os.listdir(os.path.join(DATA_DIR, dir_)):
        data_aux = []

        x_ = []
        y_ = []

        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        results = hands.process(img_rgb)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            data.append(data_aux)
            labels.append(dir_)

#Save data            
f = open('data.pickle', 'wb')
pickle.dump({'data': data, 'labels': labels}, f)
f.close()

In [None]:
# This part is only necessary to TRAIN RandomForestClassifier on your pickle data

data_dict = pickle.load(open('./data.pickle', 'rb'))

# You can print it out to check if the data structure is homogeneous
### print(data_dict.keys())
### print(data_dict)

#Data and Labels are lists, so they have to be converted here
Data_type = object

data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

#Dataset is split here in 80% training and 20% test data
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

#Simple but very fast modeltype to train - good for older hardware/training on cpu
model = RandomForestClassifier()
model.fit(x_train, y_train)

y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)

print('{}% of samples are classified correctly '.format(score * 100))

#save model
f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()