## 1. Extract Hand Landmarks
`data_dir` should contain subdirectories for each gesture, while each subdirectory should contain video(s) / photo(s) of that gesture.

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import os

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

def extract_landmarks(image):
    with mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7) as hands:
        results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                landmarks = []
                for lm in hand_landmarks.landmark:
                    landmarks.append(lm.x)
                    landmarks.append(lm.y)
                return np.array(landmarks).flatten()
    return None

# Example: Extract landmarks from frames in videos
# data_dir should contain subdirectories for each gesture, while each subdirectory should contain videos / photos of that gesture
data_dir = "address to the directory"
landmarks_list = []
labels_list = []

for label in os.listdir(data_dir):
    label_dir = os.path.join(data_dir, label)
    if os.path.isdir(label_dir):
        print(f"Processing gesture: {label}")
        video_count = 0 # For image: images_count = 0
        for video_file in os.listdir(label_dir): # For image: for image_file in os.listdir(label_dir):
            video_path = os.path.join(label_dir, video_file) # For image: image_path = os.path.join(label_dir, image_file)
            cap = cv2.VideoCapture(video_path) # For image: image = cv2.imread(image_path)
            frame_count = 0 # For image: image_count = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret: # For image: if image is None:
                    break
                frame_count += 1
                # If you want to speed up the trainning, skip frames for not redundancy
                # e.g. if frame_count % 5 == 0:, and then you can skip 4 frames, remember to indent the following 4 lines of code
                landmarks = extract_landmarks(frame)
                if landmarks is not None:
                    landmarks_list.append(landmarks)
                    labels_list.append(label)
            cap.release()
            video_count += 1
        print(f"Processed {video_count} videos for gesture: {label}")

X = np.array(landmarks_list)
y = np.array(labels_list)

print(f"Total frames processed: {len(X)}")
print(f"Total labels: {len(y)}")
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

## 2. Prepare the Dataset

In [None]:
from sklearn.preprocessing import LabelEncoder

# Encode labels to integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
print(y_encoded)

## 3. Train the Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train a RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy:.2f}")

## 4. Save the Model

In [None]:
import joblib

model_file_path = "gesture_model.pkl"
joblib.dump(model, model_file_path)