In [1]:
import cv2
import speech_recognition as sr
import threading

# Initialize recognizer
recognizer = sr.Recognizer()

# Function to perform speech recognition
def recognize_speech():
    with sr.Microphone() as source:
        print("Calibrating microphone for ambient noise...")
        recognizer.adjust_for_ambient_noise(source)  # Calibrate for ambient noise
        print("Microphone calibrated. Start speaking...")

        while True:
            try:
                # Capture audio from the microphone
                print("Listening...")
                audio = recognizer.listen(source, timeout=3)  # Listen for up to 3 seconds of speech

                # Recognize speech using Google Web Speech API
                print("Recognizing...")
                text = recognizer.recognize_google(audio, language="fr-FR")  # Use French language
                print(f"Recognized Text: {text}")

                # Update the recognized text to be displayed on the video
                global recognized_text
                recognized_text = text

            except sr.WaitTimeoutError:
                print("No speech detected. Try again.")
            except sr.UnknownValueError:
                print("Google Speech Recognition could not understand the audio.")
            except sr.RequestError as e:
                print(f"Could not request results from Google Speech Recognition service; {e}")

# Function to display the camera video feed
def display_camera_feed():
    # Open the camera
    cap = cv2.VideoCapture(0)  # 0 is the default camera

    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()

        if not ret:
            print("Error: Could not read frame.")
            break

        # Display the recognized text on the video feed
        if 'recognized_text' in globals():
            cv2.putText(frame, recognized_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Display the frame
        cv2.imshow("Real-Time Camera with Speech Recognition", frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the camera and close the window
    cap.release()
    cv2.destroyAllWindows()

# Start speech recognition in a separate thread
recognized_text = ""  # Global variable to store recognized text
speech_thread = threading.Thread(target=recognize_speech)
speech_thread.daemon = True  # Daemonize thread to exit when the main program exits
speech_thread.start()

# Start the camera feed
display_camera_feed()

Calibrating microphone for ambient noise...
Microphone calibrated. Start speaking...
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
Recognizing...
Recognized Text: bonjour tout le monde
Listening...
No speech detected. Try again.
Listening...
Recognizing...
Recognized Text: bonjour tout le monde comment
Listening...
No speech detected. Try again.
Listening...
Recognizing...
Recognized Text: bonjour à tous comment
Listening...
No speech detected. Try again.
Listening...
Recognizing...
Recognized Text: bonjour comment allez-vous aujourd'hui
Listening...
Recognizing...
Google Speech Recognition could not understand the audio.
Listening...
Recognizing...
Google Speech Recognition could not understand the audio.
Listening...
Recognizing...
Google Speech Recognition could not understand the audio.
Listening...
Recognizing...
Recognized Text: c'est ce que j'avais dit
Listening...
Recognizing...
Recognized Text: oiseau
Listening...
Recogniz

No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
Recognizing...
Google Speech Recognition could not understand the audio.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Listening...
No speech detected. Try again.
Lis

Exception in thread Thread-3 (recognize_speech):
Traceback (most recent call last):
  File "C:\Users\bouba\AppData\Local\Temp\ipykernel_20528\190741238.py", line 19, in recognize_speech
  File "c:\Users\bouba\AppData\Local\pypoetry\Cache\virtualenvs\acsr-MgaKDfGw-py3.11\Lib\site-packages\speech_recognition\__init__.py", line 460, in listen
    for a in result:
  File "c:\Users\bouba\AppData\Local\pypoetry\Cache\virtualenvs\acsr-MgaKDfGw-py3.11\Lib\site-packages\speech_recognition\__init__.py", line 530, in _listen
    buffer = source.stream.read(source.CHUNK)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\bouba\AppData\Local\pypoetry\Cache\virtualenvs\acsr-MgaKDfGw-py3.11\Lib\site-packages\speech_recognition\__init__.py", line 191, in read
    return self.pyaudio_stream.read(size, exception_on_overflow=False)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\bouba\AppData\Local\pypoetry\Cache\virtualenvs\acsr-MgaKDfGw-py3.11\Lib\sit

In [3]:
import os
import queue
import threading
import sounddevice as sd
from vosk import Model, KaldiRecognizer

# Load the Vosk model
model = Model("vosk-model-small-fr-0.22")  # Download French model from https://alphacephei.com/vosk/models

# Audio parameters
SAMPLE_RATE = 16000
CHUNK_SIZE = 4096

# Queue to hold audio chunks
audio_queue = queue.Queue()

# Function to capture audio in real-time
def capture_audio():
    def callback(indata, frames, time, status):
        if status:
            print(status)
        audio_queue.put(bytes(indata))

    with sd.RawInputStream(samplerate=SAMPLE_RATE, blocksize=CHUNK_SIZE, dtype="int16", channels=1, callback=callback):
        print("Recording...")
        while True:
            pass

# Function to transcribe audio in real-time
def transcribe_audio():
    recognizer = KaldiRecognizer(model, SAMPLE_RATE)

    while True:
        if not audio_queue.empty():
            data = audio_queue.get()
            if recognizer.AcceptWaveform(data):
                result = recognizer.Result()
                print(f"Transcribed Text: {result}")

# Start audio capture and transcription in separate threads
capture_thread = threading.Thread(target=capture_audio)
capture_thread.daemon = True
capture_thread.start()

transcribe_thread = threading.Thread(target=transcribe_audio)
transcribe_thread.daemon = True
transcribe_thread.start()

# Keep the main thread alive
while True:
    pass

Exception: Failed to create a model

In [1]:
import torch

In [2]:
torch.cuda.is_available()

False