In [1]:
import cv2
import numpy as np
import mediapipe as mp
import joblib
from sklearn.preprocessing import LabelEncoder
from PIL import ImageFont, ImageDraw, Image
import pickle
from joblib import dump, load

In [2]:
# Load the trained model
with open('best_model_rf.pkl', 'rb') as file:
         model = pickle.load(file)

In [3]:
# Label encoder for decoding predictions
with open('data.pickle', 'rb') as f:
    dataset = pickle.load(f)

label_encoder = LabelEncoder()
label_encoder.fit(dataset['labels'])

In [4]:
# Dictionary mapping numerical labels to Sinhala letters
labels_dict = {0: 'අ', 1: 'ආ', 2: 'ඇ', 3: 'ඉ', 4: 'ඊ', 5: 'උ', 6: 'ඌ', 7: 'එ', 8: 'ඒ', 9: 'ක්'}

In [5]:
# Initialize MediaPipe hands module
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Access the camera
cap = cv2.VideoCapture(0)

# Load a Sinhala-compatible font
font_path = "NotoSansSinhala-VariableFont_wdth,wght.ttf"  # Path to the font
try:
    font = ImageFont.truetype(font_path, 30)
except IOError:
    print("Font file not found. Defaulting to a basic font.")
    font = ImageFont.load_default()

print("Press 'Esc' to quit.")
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error reading frame.")
        break

    # Convert the frame to RGB (for MediaPipe and PIL)
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(img_rgb)

    # Process the image using MediaPipe
    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Draw hand landmarks and connections
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Extract and normalize landmarks
            data_aux = []
            x_ = [lm.x for lm in hand_landmarks.landmark]
            y_ = [lm.y for lm in hand_landmarks.landmark]
            for lm in hand_landmarks.landmark:
                data_aux.append((lm.x - min(x_)) / (max(x_) - min(x_)))
                data_aux.append((lm.y - min(y_)) / (max(y_) - min(y_)))

            # Predict the letter
            data_aux = np.array(data_aux).reshape(1, -1)  # Reshape for RandomForest
            prediction = model.predict(data_aux)
            predicted_label_index = prediction[0]  # RandomForest returns class index

            # Safely get the predicted letter
            predicted_letter = labels_dict.get(predicted_label_index)

            # Overlay predicted letter using Pillow
            draw = ImageDraw.Draw(pil_img)
            draw.text((50, 50), f'Letter: {predicted_letter}', font=font, fill=(255, 0, 0))

    # Convert PIL image back to OpenCV format
    frame = np.array(pil_img)
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    # Display the frame
    cv2.imshow('Live Prediction', frame)

    # Exit on 'Esc' key
    if cv2.waitKey(10) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()

Press 'Esc' to quit.
