#### Automatic Speech Recognition

In [None]:
# !pip install torchaudio
# !pip install soundfile

In [1]:
import streamlit as st
import sounddevice as sd
import numpy as np
import torch
import torchaudio
from transformers import pipeline
import io
import time

In [None]:
# Load Whisper model from Hugging Face
whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")

# Configure audio settings
SAMPLERATE = 16000  # Whisper expects 16kHz sample rate
CHANNELS = 1

# Function to capture and transcribe audio
def transcribe_audio(indata):
    audio_np = indata[:, 0].astype(np.float32)
    waveform = torch.tensor(audio_np)
    transcription = whisper_model(waveform)
    return transcription['text']

# Streamlit UI setup
st.title("Voice Transcription App")
st.write("Record your voice, and the app will transcribe it using Whisper.")

# Audio recording button
record_button = st.button("Start Recording")

# Display transcription
transcription_text = st.empty()

if record_button:
    with st.spinner("Recording..."):
        # Record for 5 seconds (You can change this)
        audio_data = sd.rec(int(SAMPLERATE * 5), samplerate=SAMPLERATE, channels=CHANNELS)
        sd.wait()  # Wait for recording to finish
        
        # Transcribe the audio
        transcription = transcribe_audio(audio_data)
        
        # Display transcription
        transcription_text.write(f"**Transcription:** {transcription}")
        
        time.sleep(1)  # Pause for smooth UI transition