In [8]:
# Install necessary libraries
!apt-get install -y swig
!pip install pocketsphinx
!pip install SpeechRecognition
!pip install pydub
!apt-get install ffmpeg


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  swig4.0
Suggested packages:
  swig-doc swig-examples swig4.0-examples swig4.0-doc
The following NEW packages will be installed:
  swig swig4.0
0 upgraded, 2 newly installed, 0 to remove and 45 not upgraded.
Need to get 1,116 kB of archives.
After this operation, 5,542 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig4.0 amd64 4.0.2-1ubuntu1 [1,110 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig all 4.0.2-1ubuntu1 [5,632 B]
Fetched 1,116 kB in 1s (1,001 kB/s)
Selecting previously unselected package swig4.0.
(Reading database ... 121926 files and directories currently installed.)
Preparing to unpack .../swig4.0_4.0.2-1ubuntu1_amd64.deb ...
Unpacking swig4.0 (4.0.2-1ubuntu1) ...
Selecting previously unselected package swig.
Preparing to unpack .../swig_4.0.2-1ubu

In [45]:
import os
import speech_recognition as sr
from pydub import AudioSegment
from pydub.utils import which
from google.colab import files

# Ensure ffmpeg is installed
if not os.path.exists(which("ffmpeg")):
    print("Installing ffmpeg...")
    !apt-get install -y ffmpeg

# Initialize the recognizer
r = sr.Recognizer()

# Function to convert audio file to WAV format if not already in WAV
def convert_to_wav(audio_file):
    if audio_file.lower().endswith('.wav'):
        return audio_file  # No need to convert if already in WAV format

    # Load the audio file and convert to WAV format
    try:
        audio = AudioSegment.from_file(audio_file)
        wav_file = audio_file[:-4] + '.wav'  # Change file extension to .wav
        audio.export(wav_file, format='wav')
        print(f"File converted to WAV: {wav_file}")
        return wav_file

    except Exception as e:
        print(f"Error converting file to WAV: {e}")
        return None

# Function to handle file upload and recognition
def recognize_from_file(audio_file):
    try:
        # Convert to WAV if necessary
        wav_file = convert_to_wav(audio_file)
        if not wav_file:
            print("Conversion to WAV failed. Exiting recognition.")
            return

        # Perform speech recognition
        with sr.AudioFile(wav_file) as source:
            audio_data = r.record(source)
            text = r.recognize_google(audio_data)
            text = text.lower()

        print("Recognized text:", text)

    except FileNotFoundError:
        print("Error: File not found.")

    except sr.RequestError as e:
        print(f"Could not request results; {e}")

    except sr.UnknownValueError:
        print("Error: Unable to recognize speech.")

# Function to handle file upload in Colab
def upload_file():
    uploaded = files.upload()
    for filename in uploaded.keys():
        print(f"Saving {filename} to {filename}")
        with open(filename, 'wb') as f:
            f.write(uploaded[filename])
        return filename

# Main loop for user interaction
while True:
    print("Enter '1' to upload an audio file.")
    print("Enter 'q' to quit.")
    choice = input("Enter your choice: ")

    if choice == '1':
        # Upload and recognize the file
        audio_file = upload_file()
        if audio_file:
            recognize_from_file(audio_file)

    elif choice.lower() == 'q':
        break

    else:
        print("Invalid choice. Please enter '1' or 'q'.")

print("Exiting program.")


Enter '1' to upload an audio file.
Enter 'q' to quit.
Enter your choice: 1


Saving hi my name is walter hartwell white.mp3 to hi my name is walter hartwell white (5).mp3
Saving hi my name is walter hartwell white (5).mp3 to hi my name is walter hartwell white (5).mp3
File converted to WAV: hi my name is walter hartwell white (5).wav
Recognized text: my name is walter hartwell white i live at 308 negra arroyo lane albuquerque new mexico 87104
Enter '1' to upload an audio file.
Enter 'q' to quit.
Enter your choice: q
Exiting program.
