In [1]:
import os
from io import BytesIO
import io
import numpy as np
import pandas as pd
from PIL import Image
import cv2 as cv
import matplotlib.pyplot as plt
import base64
import json
import time
import sounddevice as sd
import vosk
import sys
import threading
import queue
from PIL import Image

In [5]:
dataset_path = "D:\SpectoV\Hand_Gesture_Recognition\data\level_1_data\ASL_Data_AtoZ_&_0to9"

In [6]:
def image_to_base64(image_array):
    pil_img = Image.fromarray(image_array)
    buffer = io.BytesIO()
    pil_img.save(buffer, format="PNG")
    return base64.b64encode(buffer.getvalue()).decode("utf-8")

In [7]:
def load_images_from_folder(folder):
    image_dict = {}
    for filename in os.listdir(folder):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            sign = os.path.splitext(filename)[0] 
            img_path = os.path.join(folder, filename)
            image = Image.open(img_path)
            image_array = np.array(image)
            image_dict[sign] = image_array
    return image_dict

In [8]:
def convert_images_to_base64(image_dict):
    base64_dict = {}
    for sign, image_array in image_dict.items():
        base64_dict[sign] = image_to_base64(image_array)
    return base64_dict

In [9]:
def save_dict_as_json(data_dict, output_path):
    with open(output_path, 'w') as json_file:
        json.dump(data_dict, json_file)

In [2]:
output_json_path = r"D:\SpectoV\Hand_Gesture_Recognition\research\level_2\asl_signs.json"

In [11]:
def main(dataset_path, output_json_path):
    image_dict = load_images_from_folder(dataset_path)
    base64_dict = convert_images_to_base64(image_dict)
    save_dict_as_json(base64_dict, output_json_path)
    print(f"Data saved to {output_json_path}")

In [12]:
main(dataset_path,output_json_path)

Data saved to D:\SpectoV\Hand_Gesture_Recognition\research\level_2\asl_signs.json


In [3]:
def base64_to_image(base64_str):
    try:
        image_data = base64.b64decode(base64_str)
        image = Image.open(BytesIO(image_data))
        
        return image
    except Exception as e:
        print(f"Error converting base64 to image: {e}")
        return None


In [4]:
def load_json(json_path):
    with open(json_path, 'r') as json_file:
        data_dict = json.load(json_file)
    return data_dict

In [5]:
def display_images_for_string(input_string, data_dict):
    input_string = input_string.upper()
    for char in input_string:
        if char in data_dict:
            image_base64 = data_dict[char]
            image = base64_to_image(image_base64)
            image.show()
            time.sleep(2)
            image.close()
        else: 
            time.sleep(1)

In [6]:
model_path1 = r"D:\SpectoV\Hand_Gesture_Recognition\research\level_2\vosk-model-small-en-us-0.15\vosk-model-small-en-us-0.15"
model_path2 = r"D:\SpectoV\Hand_Gesture_Recognition\research\level_2\vosk-model-en-us-0.22\vosk-model-en-us-0.22"

model = vosk.Model(model_path2)
recognizer = vosk.KaldiRecognizer(model, 16000)

buffered_text = ""
audio_queue = queue.Queue()
stop_event = threading.Event()

def callback(indata, frames, time, status):
    if status:
        print(status, file=sys.stderr)
    audio_queue.put(bytes(indata))

def recognition_thread():
    global buffered_text
    last_partial = ""
    while not stop_event.is_set() or not audio_queue.empty():
        try:
            audio_data = audio_queue.get(timeout=0.1)
        except queue.Empty:
            continue
        if recognizer.AcceptWaveform(audio_data):
            result = recognizer.Result()
            result_dict = json.loads(result)
            text = result_dict.get("text", "")
            if text:
                buffered_text += text + " "
        else:
            partial_result = recognizer.PartialResult()
            result_dict = json.loads(partial_result)
            partial_text = result_dict.get("partial", "")
            if partial_text and partial_text != last_partial:
                last_partial = partial_text

def record_and_recognize(duration):
    global buffered_text
    buffered_text = ""
    
    recognition_thread_instance = threading.Thread(target=recognition_thread, daemon=True)
    recognition_thread_instance.start()

    with sd.RawInputStream(samplerate=16000, blocksize=4096, dtype='int16', channels=1, callback=callback):
        print("Listening...")
        try:
            time.sleep(duration)
        except KeyboardInterrupt:
            print("Stopped listening")

    stop_event.set()
    recognition_thread_instance.join()

    return buffered_text.strip()

def recognize_from_audio_file(file_path):
    with open(file_path, "rb") as f:
        audio_data = f.read()

    if recognizer.AcceptWaveform(audio_data):
        result = recognizer.Result()
        result_dict = json.loads(result)
        return result_dict.get("text", "")
    else:
        return "Recognition failed."


In [33]:
def main(method):
    json_path = r"D:\SpectoV\Hand_Gesture_Recognition\research\level_2\asl_signs.json"
    audio_file_path = r"D:\SpectoV\Hand_Gesture_Recognition\research\level_2\audio1.mp3"
    data_dict = load_json(json_path)
    if method == 'record':
       record_duration = 10
       recognized_text = record_and_recognize(record_duration)
       input_string = recognized_text
    elif method == 'audio_file':
       recognized_text = recognize_from_audio_file(audio_file_path)
       input_string = recognized_text

    print(f"Recognized text: {input_string}")
    #time.sleep(2)
    #display_images_for_string(input_string, data_dict)

In [36]:
main(method='record')

Listening...
Recognized text: the aliens be i'm a daughter of a professor she admitted daughter-in-law fourth quarter egypt and
