In [1]:
import openai
import pandas as pd

import gradio as gr

import json
from IPython.display import display

from utils.load_reference_files_utils import *
from utils.search_engine_utils import *
from utils.general_utils import *
from utils.generated_answer_utils import *
from utils.classifier_utils import *

import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key  = os.getenv('OPENAI_API_KEY')
OPENAI_API_KEY  = openai.api_key

In [2]:
# Load the configuration for the "local" environment
with open('../config.json', 'r') as config_file:
    config = json.load(config_file)["local"]
    filepaths = config['filepaths']
    embedding_params = config['embedding_params']
    classifier_params = config['classifier_params']
    semantic_search_params = config['semantic_search_params']
    generated_answer_params = config['generated_answer_params']

In [3]:
# Load audio transcription libraries
from faster_whisper import WhisperModel
import speech_recognition as sr

## Using speech_recognition library

In [9]:
r = sr.Recognizer()

# # obtain audio from the microphone
# with sr.Microphone() as source:
#     print("Say something!")
#     audio = r.listen(source)

# obtain audio from file
audio_path = "../data/recordings/sample_fruit_audio_2.wav"
with sr.AudioFile(audio_path) as source:
    audio = r.record(source)


In [10]:
# recognize speech using Sphinx
try:
    print("Sphinx thinks you said " + r.recognize_sphinx(audio))
except sr.UnknownValueError:
    print("Sphinx could not understand audio")
except sr.RequestError as e:
    print("Sphinx error; {0}".format(e))

Sphinx thinks you said thank you for providing those information not start at me just gave me you are keen to answer that we know what they're going to do before publisher being we have an option to rest at the apple haiti astrid using the settings of your devices and that doesn't work we can resent that is refusing of borrowed apple of advice and asked me if that's still doesn't work will you still have to respect your password


In [11]:
# recognize speech using Google Speech Recognition
try:
    # for testing purposes, we're just using the default API key
    # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
    # instead of `r.recognize_google(audio)`
    print("Google Speech Recognition thinks you said " + r.recognize_google(audio))
except sr.UnknownValueError:
    print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
    print("Could not request results from Google Speech Recognition service; {0}".format(e))


Google Speech Recognition thinks you said thank you for providing those information not start let me just give you our game plan so that we know what were going to do before troubleshooting we have an option to reset the apple id password using the settings of your device if that doesn't work we can reset the password using a borrowed apple device and last day if that still doesn't work we will use the web to reset your password


In [12]:
# recognize speech using whisper
try:
    print("Whisper thinks you said " + r.recognize_whisper(audio, language="english"))
except sr.UnknownValueError:
    print("Whisper could not understand audio")
except sr.RequestError as e:
    print("Could not request results from Whisper")

Whisper thinks you said  Thank you for providing those information. Now to start, let me just give you our game plans so that we know what we're going to do before troubleshooting. We have an option to reset the Apple ID password using the settings of your device. If that doesn't work, we can reset the password using a borrowed Apple device. And lastly, if that still doesn't work, we will use the web to reset your password.


In [13]:
# recognize speech using Whisper API
# OPENAI_API_KEY = "INSERT OPENAI API KEY HERE"
try:
    print(f"Whisper API thinks you said {r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)}")
except sr.RequestError as e:
    print("Could not request results from Whisper API")

KeyboardInterrupt: 

In [34]:
def whisper_transcriber(audio=None, type="path", model="tiny.en"):
    r = sr.Recognizer()

    if type=="path":
        with sr.AudioFile(audio) as source:
            audio = r.record(source)
    elif type=="numpy":
        pass
    elif type=="microphone":
        with sr.Microphone() as source:
            print("Say something!")
            audio = r.listen(source, timeout=2, phrase_time_limit=3)

    try:
        text = r.recognize_whisper(audio, model=model, language="english")
        return text
    except sr.UnknownValueError:
        return "Sorry, could not understand audio"
    except sr.RequestError as e:
        return f"Error with the speech recognition service; {e}"
    except AssertionError:
        return "No audio to transcribe."

In [31]:
whisper_transcriber(type="microphone")

Say something!


' Hi, I want to describe this one.'

### Using faster-whisper

In [23]:
def faster_whisper_transcriber(audio, type="path", model="tiny.en"):
    # Run on GPU with FP16
    # model = WhisperModel(model, device="cuda", compute_type="float16")

    # or run on GPU with INT8
    # model = WhisperModel(model, device="cuda", compute_type="int8_float16")

    # or run on CPU with INT8
    model = WhisperModel(model, device="cpu", compute_type="int8")

    if type=="path":
        segments, _ = model.transcribe(audio, beam_size=5)
        for segment in segments:
            print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
            print(segment.text)
            # print(type(segment.text))
    elif type=="numpy":
        pass
    return segments

In [24]:
segments = faster_whisper_transcriber(audio_path)

 Thank you for providing those information, not start, let me just give you our game plan
 so that we know what we're going to do before troubleshooting.
 We have an option to reset the Apple ID password using the settings of your device.
 If that doesn't work, we can reset the password using a borrowed Apple device.
 And lastly, if that still doesn't work, we will use the web to reset your password.


### Gradio demo using Interface

In [32]:
demo = gr.Interface(
    whisper_transcriber,
    [gr.Audio(source="microphone", type="filepath", format="wav")],
    # [gr.Audio(source="microphone", type="numpy")],
    ["text"]
)

demo.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.






### Gradio demo using Blocks

In [35]:
demo = gr.Blocks(gr.themes.Glass())

with demo:
    with gr.Row():
        title = gr.Label("Real time audio transcription with OpenAI Whisper")
    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(source="microphone", type="filepath", format="wav")
        with gr.Column():
            audio_transcription = gr.Textbox(label="Audio transcription", interactive=True)
    
    audio_input.change(whisper_transcriber, inputs=audio_input, outputs=audio_transcription)

demo.launch()

Running on local URL:  http://127.0.0.1:7863

Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\canepomuceno\AppData\Local\pypoetry\Cache\virtualenvs\fruit-kb-QON9TVu_-py3.11\Lib\site-packages\gradio\routes.py", line 516, in predict
    output = await route_utils.call_process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\canepomuceno\AppData\Local\pypoetry\Cache\virtualenvs\fruit-kb-QON9TVu_-py3.11\Lib\site-packages\gradio\route_utils.py", line 219, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\canepomuceno\AppData\Local\pypoetry\Cache\virtualenvs\fruit-kb-QON9TVu_-py3.11\Lib\site-packages\gradio\blocks.py", line 1437, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\canepomuceno\AppData\Local\pypoetry\Cache\virtualenvs\fruit-kb-QON9TVu_-py3.11\Lib\site-packages\gradio\blocks.py", line 1109, in call_function
    prediction = await anyio.to_thread.run_sync(
