In [None]:
!pip install unrar

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting unrar
  Downloading unrar-0.4-py3-none-any.whl (25 kB)
Installing collected packages: unrar
Successfully installed unrar-0.4


In [None]:
!unrar x '/content/drive/MyDrive/Sign Language/Dataset/Alphabet.rar'

In [None]:
import cv2
import matplotlib.pyplot as plt
import os
import pickle
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
try:
  import mediapipe as mp
except:
  !pip install mediapipe
  import mediapipe as mp

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mediapipe
  Downloading mediapipe-0.9.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (33.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.0/33.0 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
Collecting flatbuffers>=2.0
  Downloading flatbuffers-23.1.21-py2.py3-none-any.whl (26 kB)
Installing collected packages: flatbuffers, mediapipe
  Attempting uninstall: flatbuffers
    Found existing installation: flatbuffers 1.12
    Uninstalling flatbuffers-1.12:
      Successfully uninstalled flatbuffers-1.12
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.9.2 requires flatbuffers<2,>=1.12, but you have flatbuffers 23.1.21 which is incompatible.[0m[31m
[0mSuccessfully installed flatbuffers-23.1.21 mediap

**Mediapipe setup**

In [None]:
DATASET = '/content/Alphabet'

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

hands = mp_hands.Hands(
        static_image_mode=True,
        max_num_hands=2,
        min_detection_confidence=0.3)

**Data preparation**

In [None]:
# Create data/labels arrays
data = []
labels = []

# Loop through each directory(each letter)
for dir_ in os.listdir(DATASET):
  # Loop through each image from the directory
    for img_path in os.listdir(os.path.join(DATASET, dir_)):
      # Create arrays for data
        data = []
        x_ = []
        y_ = []

        # Read the image
        img = cv2.imread(os.path.join(DATASET, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Get the hand landmarks
        results = hands.process(img_rgb)

        # Check if there was a landmark
        if results.multi_hand_landmarks:
          # Loop through each landmark
            for hand_landmarks in results.multi_hand_landmarks:
              # Loop through the coord of the landmarks and save them to x_ and y_
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)
              # Loop through the coord of the landmarks and perform normalization
              # We literally substract the min of x/y of the landmark from each x/y of it
              # In order for our model to learn faster and better
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data.append(x - min(x_))
                    data.append(y - min(y_))

          # We append the data and the label to the original arrays
            data.append(data)
            labels.append(dir_)

**Save/Load the hand landmarks for each image**

In [None]:
with open('data.pickle', 'wb') as f:
  pickle.dump({'data': data, 'labels': labels}, f)

In [None]:
data_dict = pickle.load(open('/content/data.pickle', 'rb'))

**Turn the data into np arrays**

In [None]:
data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

**Split the data**

In [None]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

**For a few images there were more than one hand it seems so the size of the array will be bigger than the others and it wont work. We delete the arrays with more than one hand detected (21 landmarks) (x + y = 42)**

In [None]:
train_idx = []
for e, j in enumerate(x_train):
  if len(j) != 42:
    train_idx.append(e)

In [None]:
test_idx = []
for e, j in enumerate(x_test):
  if len(j) != 42:
    test_idx.append(e)

In [None]:
x_train = np.delete(x_train, train_idx)
y_train = np.delete(y_train, train_idx)

In [None]:
x_test = np.delete(x_test, test_idx)
y_test = np.delete(y_test, test_idx)

**Create model and predict on the test set**

In [None]:
model = RandomForestClassifier()
model.fit(x_train.tolist(), y_train)

RandomForestClassifier()

In [None]:
y_predict = model.predict(x_test.tolist())

score = accuracy_score(y_predict, y_test)

print(f'{score * 100} of samples were classified correctly !')

97.82193958664547% of samples were classified correctly !


**Save/Load the model**

In [None]:
f = open('model.p', 'wb')
pickle.dump({'model': model}, f)
f.close()

In [None]:
model_dict = pickle.load(open('/content/model.p', 'rb'))
model = model_dict['model']

**Predict on a video**

In [None]:
# Get the path of the input/output videos
in_video = '/content/in.mp4'
out_video = '/content/out.avi'

# Mediapipe and videocapture setup
cap = cv2.VideoCapture(in_video)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.4)

ret, frame = cap.read()

H, W, _ = frame.shape

fourcc = cv2.VideoWriter_fourcc(*'XVID')

out = cv2.VideoWriter(out_video, fourcc,  30.0, (W, H))

while True:
    # Create data/labels arrays
    data = []
    x_ = []
    y_ = []

    # Get the frame of the video, if there are no frames left exit the loop
    ret, frame = cap.read()
    if not ret:
     break
    
    # Frame shape
    H, W, _ = frame.shape
    # Convert the frame from BGR to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Get the hand landmarks
    results = hands.process(frame_rgb)

    # Check if there was a landmark
    if results.multi_hand_landmarks:

        # Loop through each landmark and draw them on the image
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                frame,  # image to draw
                hand_landmarks,  # model output
                mp_hands.HAND_CONNECTIONS,  # hand connections
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
            
        # Loop through each landmark
        for hand_landmarks in results.multi_hand_landmarks:
            # Loop through the coord of the landmarks and save them to x_ and y_
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

            # Loop through the coord of the landmarks and perform normalization
            # We literally substract the min of x/y of the landmark from each x/y of it
            # In order for our model to learn faster and better
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y
                data.append(x - min(x_))
                data.append(y - min(y_))

        # Get the coord of the min and max landmark point in order to draw a rectangle box
        x1 = int(min(x_) * W) - 10
        y1 = int(min(y_) * H) - 10

        x2 = int(max(x_) * W) + 10
        y2 = int(max(y_) * H) + 10

        # Make the prediction
        prediction = str(model.predict([np.asarray(data)]))

        # Frame the hand into a rectangle box and predict the label
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
        cv2.putText(frame, prediction, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3,
                    cv2.LINE_AA)

    # Write the frame to the video
    out.write(frame)
    cv2.waitKey(1)

# Close the writing of the video file
cap.release()
out.release()
cv2.destroyAllWindows()