In [1]:
from transformers.utils import logging
logging.set_verbosity_error()

In [2]:
from datasets import load_dataset

In [3]:
dataset = load_dataset("librispeech_asr",
                       split="train.clean.100",
                       streaming=True,
                       trust_remote_code=True)

In [5]:
from transformers import pipeline

In [6]:
asr = pipeline(task="automatic-speech-recognition",
               model="./models/openai/whisper-large-v3")




In [9]:
import os
import gradio as gr

In [10]:
demo = gr.Blocks()

In [11]:
def transcribe_speech(filepath):
    if filepath is None:
        gr.Warning("No audio found, please retry.")
        return ""
    output = asr(filepath)
    return output["text"]

In [12]:
mic_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="microphone",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never")

In [13]:
file_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="upload",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never",
)

In [19]:
# Create the Blocks interface
with gr.Blocks() as demo:
    with gr.Tab("Transcribe Microphone"):
        with gr.Group():
            mic_input = gr.Audio(sources="microphone", type="filepath")
        with gr.Group():
            mic_output = gr.Textbox(label="Transcription", lines=3)
        mic_input.change(transcribe_speech, inputs=[mic_input], outputs=[mic_output])
    
    with gr.Tab("Transcribe Audio File"):
        with gr.Group():
            file_input = gr.Audio(sources="upload", type="filepath")
        with gr.Group():
            file_output = gr.Textbox(label="Transcription", lines=3)
        file_input.change(transcribe_speech, inputs=[file_input], outputs=[file_output])


In [None]:
demo.launch(share=True)

In [23]:
demo.close()

Closing server running on port: 7860
