In [1]:
!apt-get install -y ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [2]:
!pip install streamlit
!pip install pydub
!pip install openai-whisper
!pip install pyngrok
!pip install PyAudio
!pip install SpeechRecognition

Collecting streamlit
  Downloading streamlit-1.40.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Downloading streamlit-1.40.1-py2.py3-none-any.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m72.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[

In [3]:
%%writefile app.py
import streamlit as st
import whisper
import tempfile
import os
from pydub import AudioSegment

# Install FFmpeg and Whisper in Google Colab
# !apt-get install -y ffmpeg
# !pip install openai-whisper pydub

# Set the environment variable to use ffmpeg in pydub
os.environ["FFMPEG_BINARY"] = "/usr/bin/ffmpeg"

# Load Whisper model (use the base model; you can change this to "large" for better accuracy)
model = whisper.load_model("base")

# Function to extract text from uploaded audio file using Whisper
def transcribe_audio_file(uploaded_file):
    # Convert uploaded file into the right format (wav)
    audio = AudioSegment.from_file(uploaded_file)
    with tempfile.NamedTemporaryFile(delete=True) as temp_audio_file:
        audio.export(temp_audio_file.name, format="wav")

        # Use Whisper to transcribe the audio
        result = model.transcribe(temp_audio_file.name)
        return result["text"]

# Streamlit UI setup
st.title("Speech-to-Text Transcription using Whisper")

# Option to upload audio file
uploaded_file = st.file_uploader("Upload an Audio File", type=["wav", "mp3", "ogg"])

if uploaded_file is not None:
    st.audio(uploaded_file, format='audio/wav')
    result = transcribe_audio_file(uploaded_file)
    st.subheader("Extracted Text from Uploaded Audio")
    st.write(result)

# List all available microphones
import speech_recognition as sr
mic_list = sr.Microphone.list_microphone_names()

# Check if there are any available microphones
if mic_list:
    st.write("Available microphones:", mic_list)
    mic_index = 0  # You can adjust this to select a specific microphone if needed
else:
    st.error("No microphones detected!")
    mic_index = None  # Set mic_index to None if no microphones are available

# Initialize recognizer and microphone
recognizer = sr.Recognizer()

# Check if a valid microphone is available
if mic_index is not None:
    mic = sr.Microphone(device_index=mic_index)

    # Option to record from microphone with Start and Stop buttons
    st.subheader("Record from Microphone")

    if st.button("Start Recording"):
        with mic as source:
            recognizer.adjust_for_ambient_noise(source)
            st.info("Recording... Please speak now.")
            audio_data = recognizer.listen(source)
            with tempfile.NamedTemporaryFile(delete=True) as temp_audio_file:
                # Save recorded audio to temporary file in WAV format
                temp_audio_file_name = temp_audio_file.name + ".wav"
                with open(temp_audio_file_name, "wb") as f:
                    f.write(audio_data.get_wav_data())

                # Use Whisper to transcribe the audio
                result = model.transcribe(temp_audio_file_name)
                st.subheader("Extracted Text from Microphone")
                st.write(result["text"])

Writing app.py


In [None]:
# Import ngrok
from pyngrok import ngrok

# Set your authtoken
ngrok.set_auth_token("<NGROK_AUTH_TOKEN>") # Replace YOUR_AUTHTOKEN with your actual authtoken

# Kill any existing ngrok processes
ngrok.kill()

# Start Streamlit with nohup
!nohup streamlit run app.py &

# Create a public URL with ngrok to access the app
public_url = ngrok.connect(addr='8501')
print(f"Public URL: {public_url}")

nohup: appending output to 'nohup.out'
Public URL: NgrokTunnel: "https://070a-34-125-126-81.ngrok-free.app" -> "http://localhost:8501"


In [5]:
ngrok.kill()