In [8]:
pip install pyaudio

Note: you may need to restart the kernel to use updated packages.


In [9]:
import pyaudio
import wave

def record_audio(output_filename, duration, sample_rate=44100, chunk_size=1024):
    """
    Records live audio from the microphone and saves it to a .wav file.

    Args:
        output_filename (str): The name of the output WAV file.
        duration (int): Duration of the recording in seconds.
        sample_rate (int): The sample rate of the recording (default is 44100 Hz).
        chunk_size (int): The size of each audio chunk (default is 1024).
    """
    # Initialize PyAudio
    audio = pyaudio.PyAudio()

    # Set audio stream parameters
    stream = audio.open(
        format=pyaudio.paInt16,  # 16-bit resolution
        channels=1,             # Mono audio
        rate=sample_rate,       # Sampling rate
        input=True,             # Use input device
        frames_per_buffer=chunk_size  # Buffer size
    )

    print(f"Recording for {duration} seconds...")

    frames = []  # List to store audio chunks

    # Record audio
    for _ in range(0, int(sample_rate / chunk_size * duration)):
        data = stream.read(chunk_size)
        frames.append(data)

    print("Recording finished.")

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded audio to a WAV file
    with wave.open(output_filename, "wb") as wf:
        wf.setnchannels(1)  # Mono audio
        wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wf.setframerate(sample_rate)
        wf.writeframes(b"".join(frames))

    print(f"Audio saved to {output_filename}")

# Example usage
if __name__ == "__main__":
    output_file = "output.wav"
    record_duration = 6  # Record for 5 seconds
    record_audio(output_file, record_duration)

Recording for 6 seconds...
Recording finished.
Audio saved to output.wav


In [10]:
import pyaudio
import wave
import speech_recognition as sr

def record_audio(output_filename, duration, sample_rate=44100, chunk_size=1024):
    """
    Records live audio from the microphone and saves it to a .wav file.

    Args:
        output_filename (str): The name of the output WAV file.
        duration (int): Duration of the recording in seconds.
        sample_rate (int): The sample rate of the recording (default is 44100 Hz).
        chunk_size (int): The size of each audio chunk (default is 1024).
    """
    # Initialize PyAudio
    audio = pyaudio.PyAudio()

    # Set audio stream parameters
    stream = audio.open(
        format=pyaudio.paInt16,  # 16-bit resolution
        channels=1,             # Mono audio
        rate=sample_rate,       # Sampling rate
        input=True,             # Use input device
        frames_per_buffer=chunk_size  # Buffer size
    )

    print(f"Recording for {duration} seconds...")

    frames = []  # List to store audio chunks

    # Record audio
    for _ in range(0, int(sample_rate / chunk_size * duration)):
        data = stream.read(chunk_size)
        frames.append(data)

    print("Recording finished.")

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded audio to a WAV file
    with wave.open(output_filename, "wb") as wf:
        wf.setnchannels(1)  # Mono audio
        wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wf.setframerate(sample_rate)
        wf.writeframes(b"".join(frames))

    print(f"Audio saved to {output_filename}")


def audio_to_text(audio_filename):
    """
    Converts an audio file to text using the SpeechRecognition library.

    Args:
        audio_filename (str): Path to the audio file.

    Returns:
        str: Transcribed text.
    """
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(audio_filename) as source:
        print("Processing audio...")
        audio_data = recognizer.record(source)

    # Recognize and return the text
    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Speech not recognized."
    except sr.RequestError as e:
        return f"Could not request results from the API; {e}"


if __name__ == "__main__":
    # Parameters
    output_file = "output.wav"
    record_duration = 5  # Record for 5 seconds

    # Record live audio
    record_audio(output_file, record_duration)

    # Convert recorded audio to text
    transcribed_text = audio_to_text(output_file)
    print("Transcribed Text:")
    print(transcribed_text)

Recording for 5 seconds...
Recording finished.
Audio saved to output.wav
Processing audio...
Transcribed Text:
Speech not recognized.


In [16]:
import pyaudio
import wave
import speech_recognition as sr
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
import nltk

# Ensure NLTK data is downloaded
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def record_audio(output_filename, duration, sample_rate=44100, chunk_size=1024):
    # Initialize PyAudio
    audio = pyaudio.PyAudio()

    # Set audio stream parameters
    stream = audio.open(
        format=pyaudio.paInt16,  # 16-bit resolution
        channels=1,             # Mono audio
        rate=sample_rate,       # Sampling rate
        input=True,             # Use input device
        frames_per_buffer=chunk_size  # Buffer size
    )

    print(f"Recording for {duration} seconds...")

    frames = []  # List to store audio chunks

    # Record audio
    for _ in range(0, int(sample_rate / chunk_size * duration)):
        data = stream.read(chunk_size)
        frames.append(data)

    print("Recording finished.")

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded audio to a WAV file
    with wave.open(output_filename, "wb") as wf:
        wf.setnchannels(1)  # Mono audio
        wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wf.setframerate(sample_rate)
        wf.writeframes(b"".join(frames))

    print(f"Audio saved to {output_filename}")


def audio_to_text(audio_filename):
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(audio_filename) as source:
        print("Processing audio...")
        audio_data = recognizer.record(source)

    # Recognize and return the text
    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Speech not recognized."
    except sr.RequestError as e:
        return f"Could not request results from the API; {e}"


def apply_nlp(text):
    print("\nOriginal Text:")
    print(text)

    # Tokenization
    tokens = word_tokenize(text)
    print("\nTokens:")
    print(tokens)

    # Stopword Removal
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
    print("\nFiltered Tokens (No Stopwords):")
    print(filtered_tokens)

    # Stemming
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
    print("\nStemmed Tokens:")
    print(stemmed_tokens)

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
    print("\nLemmatized Tokens:")
    print(lemmatized_tokens)

    # Vectorization
    vectorizer = CountVectorizer()
    vectorized_data = vectorizer.fit_transform([" ".join(filtered_tokens)])
    print("\nVectorized Tokens (Feature Matrix):")
    print(vectorized_data.toarray())
    print("\nFeature Names:")
    print(vectorizer.get_feature_names_out())


if __name__ == "__main__":
    # Parameters
    output_file = "output.wav"
    record_duration = 10  # Record for 5 seconds

    # Record live audio
    record_audio(output_file, record_duration)

    # Convert recorded audio to text
    transcribed_text = audio_to_text(output_file)

    # Apply NLP techniques
    if transcribed_text:
        apply_nlp(transcribed_text)

Recording for 10 seconds...


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\arunv\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\arunv\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\arunv\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Recording finished.
Audio saved to output.wav
Processing audio...

Original Text:
it is Arun Vijay from IPCS currently and studying NLP

Tokens:
['it', 'is', 'Arun', 'Vijay', 'from', 'IPCS', 'currently', 'and', 'studying', 'NLP']

Filtered Tokens (No Stopwords):
['Arun', 'Vijay', 'IPCS', 'currently', 'studying', 'NLP']

Stemmed Tokens:
['arun', 'vijay', 'ipc', 'current', 'studi', 'nlp']

Lemmatized Tokens:
['Arun', 'Vijay', 'IPCS', 'currently', 'studying', 'NLP']

Vectorized Tokens (Feature Matrix):
[[1 1 1 1 1 1]]

Feature Names:
['arun' 'currently' 'ipcs' 'nlp' 'studying' 'vijay']
