### Collect Image

In [12]:
import os
import cv2


DATA_DIR = './data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

number_of_classes = 3
dataset_size = 100

cap = cv2.VideoCapture(0)
for j in range(number_of_classes):
    if not os.path.exists(os.path.join(DATA_DIR, str(j))):
        os.makedirs(os.path.join(DATA_DIR, str(j)))

    print('Collecting data for class {}'.format(j))

    done = False
    while True:
        ret, frame = cap.read()
        cv2.putText(frame, 'Ready? Press "Q" ! :)', (100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3,
                    cv2.LINE_AA)
        cv2.imshow('frame', frame)
        if cv2.waitKey(25) == ord('q'):
            break

    counter = 0
    while counter < dataset_size:
        ret, frame = cap.read()
        cv2.imshow('frame', frame)
        cv2.waitKey(25)
        cv2.imwrite(os.path.join(DATA_DIR, str(j), '{}.jpg'.format(counter)), frame)

        counter += 1

cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)

Collecting data for class 0


KeyboardInterrupt: 

: 

### Data Processing

In [1]:
import mediapipe as mp
import cv2
import os #work with file path
import matplotlib.pyplot as plt

2024-05-25 21:05:08.588723: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
mp_hands = mp.solutions.hands #initialize the Hands class an store it in a variable
mp_drawing = mp.solutions.drawing_utils #draw all the hand’s landmarks points on the output image
mp_drawing_styles = mp.solutions.drawing_styles

In [3]:
#set the hands function which will hold the landmarks points
hands = mp_hands.Hands(static_image_mode=True,min_detection_confidence=0.3)

I0000 00:00:1716642345.692324  198280 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.11), renderer: Intel(R) Iris(TM) Plus Graphics 645


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1716642345.823790  199051 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1716642345.861896  199058 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [10]:
DATA_DIR = './data'

data = []
labels = []

for dir_ in os.listdir(DATA_DIR):
    if(dir_ != ".DS_Store"):
        for img_path in os.listdir(os.path.join(DATA_DIR,dir_)):
            data_aux = []
            img = cv2.imread(os.path.join(DATA_DIR,dir_,img_path))
            #convert bgr to rgb
            img_rgb = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) #Mediapipe processes frames in RGB format.
            results = hands.process(img_rgb) #detects hand landmarks in the frame
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    for i in range(len(hand_landmarks.landmark)): #each hand keypoints
                        x = hand_landmarks.landmark[i].x
                        y = hand_landmarks.landmark[i].y
                        data_aux.append(x)
                        data_aux.append(y)
                data.append(data_aux)
                labels.append(dir_)

                

            






In [12]:
import pickle 
f = open('data.pickle', 'wb')
pickle.dump({'data':data,'labels':labels},f)
f.close()

### Train Model

In [13]:
import pickle

In [15]:
data_dict = pickle.load(open('./data.pickle','rb'))

In [16]:
print(data_dict.keys())
print(data_dict)

dict_keys(['data', 'labels'])
{'data': [[0.24645665287971497, 0.6247559785842896, 0.30018970370292664, 0.6013228297233582, 0.3484453558921814, 0.5148500800132751, 0.37034210562705994, 0.420922189950943, 0.37030550837516785, 0.34910520911216736, 0.3418671488761902, 0.41494157910346985, 0.3427201211452484, 0.35116612911224365, 0.32625582814216614, 0.4297608733177185, 0.3236357271671295, 0.45882532000541687, 0.30482447147369385, 0.3998677134513855, 0.3044435381889343, 0.34738725423812866, 0.29472044110298157, 0.44671133160591125, 0.299863338470459, 0.4573383033275604, 0.26903802156448364, 0.3966594934463501, 0.2668851315975189, 0.3595053553581238, 0.2626395523548126, 0.44797706604003906, 0.2699909210205078, 0.45376133918762207, 0.23389381170272827, 0.39869287610054016, 0.2336612343788147, 0.3682617247104645, 0.23257869482040405, 0.4311324656009674, 0.2351333200931549, 0.445302277803421], [0.32654985785484314, 0.5986249446868896, 0.3750448226928711, 0.5716711282730103, 0.4156273603439331, 

In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

In [19]:
data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

In [20]:
X_train, x_test, y_train, y_test = train_test_split(data,labels, test_size=0.2,shuffle=True,stratify=labels)

In [21]:
model = RandomForestClassifier()

model.fit(X_train,y_train)

y_predict = model.predict(x_test)

In [23]:
score = accuracy_score(y_predict,y_test)
print('{}% of samples were classified correctly'.format(score*100))

100.0% of samples were classified correctly


In [24]:
f = open('model.p','wb')
pickle.dump({'model':model},f)
f.close()

### Test the model

In [2]:
import mediapipe as mp
import cv2
import os #work with file path
import matplotlib.pyplot as plt

2024-05-25 23:51:09.808825: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
import pickle

In [4]:
import numpy as np

In [5]:
mp_hands = mp.solutions.hands #initialize the Hands class an store it in a variable
mp_drawing = mp.solutions.drawing_utils #draw all the hand’s landmarks points on the output image
mp_drawing_styles = mp.solutions.drawing_styles

In [6]:
#set the hands function which will hold the landmarks points
hands = mp_hands.Hands(static_image_mode=True,min_detection_confidence=0.3)

I0000 00:00:1716652278.966780  308867 gl_context.cc:357] GL version: 2.1 (2.1 INTEL-22.5.11), renderer: Intel(R) Iris(TM) Plus Graphics 645


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1716652278.993207  309183 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1716652279.025506  309177 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [7]:
model_dict = pickle.load(open('./model.p','rb'))
model = model_dict['model']

In [11]:
cap = cv2.VideoCapture(0)
labels_dict = {0:'A', 1:'B',2:'L'}

while True:
    data_aux = []
    x_ = []
    y_ = []
    ret, frame = cap.read()
    H, W, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style()
            )
        for hand_landmarks in results.multi_hand_landmarks:
           for i in range(len(hand_landmarks.landmark)): #each hand keypoints
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data_aux.append(x)
                data_aux.append(y)
                x_.append(x)
                y_.append(y)

        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) - 10
        y2 =int(max(y_) * H) - 10

        prediction = model.predict([np.asarray(data_aux)])

        predicted_character = labels_dict[int(prediction[0])]
    
        cv2.rectangle(frame, (x1,y1),(x2,y2),(0,0,0),4)
        cv2.putText(frame, predicted_character, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                        cv2.LINE_AA)

    cv2.imshow('frame',frame)
    cv2.waitKey(1)

    # Break gracefully
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
    
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)




-1