In [1]:
import speech_recognition as sr
import nltk
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

In [2]:
nltk.download('punkt_tab')
nltk.download('wordnet')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

In [4]:
def transcribe_audio(file_path):
    recognizer = sr.Recognizer()
    # Load the audio file
    with sr.AudioFile(file_path) as source:
        audio_data = recognizer.record(source)
        # Transcribe audio to text
        try:
            transcription = recognizer.recognize_google(audio_data)
            return transcription
        except sr.UnknownValueError:
            return "Could not understand audio"
        except sr.RequestError:
            return "Error with transcription service" 

In [5]:
def preprocess_text(text):
    words = nltk.word_tokenize(text)
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    stemmed_words = [stemmer.stem(word) for word in words]
    
    lemmatized_text = ' '.join(lemmatized_words)
    stemmed_text = ' '.join(stemmed_words)
    
    return lemmatized_text, stemmed_text

In [6]:
def main(audio_file_path):
    # Transcribe the audio file to text
    transcription = transcribe_audio(audio_file_path)
    print(f"Transcription: {transcription}")
    
    # Preprocess the transcribed text
    lemmatized_text, stemmed_text = preprocess_text(transcription)
    
    print(f"Lemmatized Text: {lemmatized_text}")
    print(f"Stemmed Text: {stemmed_text}")

In [7]:
audio_file_path = "C:/Users/HP/OneDrive/Desktop/Infosys Springboard/test_audio_converted.wav" 
main(audio_file_path)

Transcription: rabbit was playing in a garden
Lemmatized Text: rabbit wa playing in a garden
Stemmed Text: rabbit wa play in a garden
