In [2]:
import cv2
import mediapipe as mp
import copy
import itertools
from tensorflow import keras
import numpy as np
import pandas as pd
import pytesseract
import requests

# Set the path for Tesseract OCR executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Load the saved model from file
model = keras.models.load_model("model.h5")

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

# New folder names for gestures
gesture_labels = ['bathroom', 'hello', 'help', 'iloveyou', 'more', 'no', 'repeat', 'thanks', 'yes']

# Functions
def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]
    landmark_point = []

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        landmark_point.append([landmark_x, landmark_y])

    return landmark_point

def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] -= base_x
        temp_landmark_list[index][1] -= base_y

    temp_landmark_list = list(itertools.chain.from_iterable(temp_landmark_list))
    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list

# Function to send text to Raspberry Pi
def send_text_to_pi(text):
    url = "http://192.168.137.162:5000/speak"
    data = {'text': text}
    response = requests.post(url, json=data)
    print(response.text)

# For webcam input:
cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    model_complexity=0,
    max_num_hands=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        success, image = cap.read()
        image = cv2.flip(image, 1)
        if not success:
            print("Ignoring empty camera frame.")
            continue

        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands.process(image)

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        debug_image = copy.deepcopy(image)

        detected_labels = []

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                landmark_list = calc_landmark_list(debug_image, hand_landmarks)
                pre_processed_landmark_list = pre_process_landmark(landmark_list)
                mp_drawing.draw_landmarks(
                    image,
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style())
                df = pd.DataFrame(pre_processed_landmark_list).transpose()

                # Predict the gesture
                predictions = model.predict(df, verbose=0)
                predicted_class = np.argmax(predictions, axis=1)
                label = gesture_labels[predicted_class[0]]

                # Modify 'iloveyou' to 'I love you' for speech output
                if label == 'iloveyou':
                    label = 'I love you'

                detected_labels.append(label)
                cv2.putText(image, label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
                print(label)

        # Use Tesseract OCR to extract text from the image
        extracted_text = pytesseract.image_to_string(image)
        print("Extracted Text:", extracted_text)

        # Combine detected labels and extracted text
        combined_text = ' '.join(detected_labels) + ' ' + extracted_text

        # Send the combined text to Raspberry Pi
        send_text_to_pi(combined_text)

        # Output image
        cv2.imshow('Indian sign language detector', image)
        if cv2.waitKey(5) & 0xFF == 27:
            break

cap.release()
cv2.destroyAllWindows()

Extracted Text: ie
‘ pai
a)




ConnectionError: HTTPConnectionPool(host='192.168.137.162', port=5000): Max retries exceeded with url: /speak (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001E30AF31DF0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))