In [1]:
!pip install faster-whisper torch pydub



In [2]:
import io
import os
import tempfile
from pydub import AudioSegment
from faster_whisper import WhisperModel

print("Libraries imported successfully.")

Libraries imported successfully.


In [3]:
!pip install sounddevice numpy scipy



In [17]:
import sounddevice as sd
from scipy.io.wavfile import write
import numpy as np
import time

def record_audio(filename="output.wav", duration_seconds=5, samplerate=44100, channels=1):
    print(f"Recording for {duration_seconds} seconds...")
    print(f"Sample rate: {samplerate} Hz, Channels: {channels}")

    try:
        # Record audio
        # dtype='float32' is common for sounddevice and can be converted later if needed
        myrecording = sd.rec(int(samplerate * duration_seconds),
                             samplerate=samplerate, channels=channels, dtype='float32')
        sd.wait()  # Wait until recording is finished

        # Convert to a format suitable for WAV saving (e.g., int16)
        # Scale to -32767 to +32767 for int16 range
        myrecording_int16 = np.int16(myrecording * 32767)

        # Save the recording as a WAV file
        write(filename, samplerate, myrecording_int16)
        print(f"Recording saved to {filename}")

    except Exception as e:
        print(f"An error occurred during recording: {e}")
        print("Please ensure you have a microphone connected and selected as the default input device.")
        print("You might also need to grant microphone permissions to your terminal/IDE.")

if __name__ == "__main__":
    # --- Configuration ---
    RECORDING_DURATION = 10  # seconds
    OUTPUT_FILENAME = "my_recorded_voice.wav"
    # -------------------

    print("Press Ctrl+C to stop recording early (if not using fixed duration).")
    record_audio(filename=OUTPUT_FILENAME, duration_seconds=RECORDING_DURATION)

    print("\nAttempting to play back the recorded audio...")
    try:
        from scipy.io import wavfile
        samplerate_read, data_read = wavfile.read(OUTPUT_FILENAME)
        sd.play(data_read, samplerate_read)
        sd.wait()
        print("Playback finished.")
    except Exception as e:
        print(f"Could not play back audio (often due to missing 'portaudio' or similar playback drivers): {e}")
        print("Playback is optional, recording should still be in the .wav file.")

Press Ctrl+C to stop recording early (if not using fixed duration).
Recording for 10 seconds...
Sample rate: 44100 Hz, Channels: 1
Recording saved to my_recorded_voice.wav

Attempting to play back the recorded audio...
Playback finished.


In [18]:
test_audio_input_path = "my_recorded_voice.wav"

In [19]:
if not os.path.exists(test_audio_input_path):
    print("Skipping audio conversion as input file was not found.")
    temp_wav_filepath = None
else:
    input_extension = os.path.splitext(test_audio_input_path)[1].lower()
    if input_extension == '.webm':
        try:
            # Read WebM audio
            with open(test_audio_input_path, 'rb') as f:
                webm_audio_bytes = io.BytesIO(f.read())

            # Convert to WAV and save to a temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav_file:
                audio = AudioSegment.from_file(webm_audio_bytes, format="webm")
                audio.export(tmp_wav_file.name, format="wav")
                temp_wav_filepath = tmp_wav_file.name
            print(f"WebM converted to WAV and saved to: {temp_wav_filepath}")

        except Exception as e:
            temp_wav_filepath = None
            print(f"ERROR: Failed to convert audio using pydub. Make sure FFmpeg is installed and accessible in your system's PATH. Error: {e}")
    elif input_extension == '.wav':
        temp_wav_filepath = test_audio_input_path # Already WAV, use directly
        print(f"Input is already WAV: {temp_wav_filepath}")
    else:
        temp_wav_filepath = None
        print(f"ERROR: Unsupported audio input format: {input_extension}. Please use .webm or .wav.")


Input is already WAV: my_recorded_voice.wav


In [20]:
# Cell 5: Load Whisper Model and Transcribe
whisper_model = None
transcript = "Transcription failed."

if temp_wav_filepath and os.path.exists(temp_wav_filepath):
    try:
        # Load the Whisper model. Choose size: "tiny", "base", "small", "medium"
        # "small" is a good balance for M3 8GB.
        # device="mps" uses your Apple Silicon GPU. compute_type="float16" is efficient.
        print(f"Loading Whisper model 'small' for MPS (Apple Silicon GPU)...")
        whisper_model = WhisperModel("small", device="mps", compute_type="float16")
        print("Whisper model loaded.")

        print(f"Starting transcription for: {temp_wav_filepath}")
        segments, info = whisper_model.transcribe(temp_wav_filepath, beam_size=5)

        # Combine segments into a single transcript
        transcript_parts = []
        for segment in segments:
            transcript_parts.append(segment.text)
        transcript = " ".join(transcript_parts)

        print("\n--- Transcription Result ---")
        print(transcript)
        print("--------------------------")

    except Exception as e:
        print(f"ERROR: An error occurred during Whisper transcription: {e}")
        print("Trying fallback to CPU if MPS failed...")
        try:
            # Fallback to CPU if MPS fails for some reason
            whisper_model = WhisperModel("small", device="cpu")
            segments, info = whisper_model.transcribe(temp_wav_filepath, beam_size=5)
            transcript_parts = [segment.text for segment in segments]
            transcript = " ".join(transcript_parts)
            print("\n--- Transcription Result (CPU) ---")
            print(transcript)
            print("--------------------------")
        except Exception as cpu_e:
            print(f"ERROR: Failed to transcribe on CPU either: {cpu_e}")
            transcript = "Transcription failed on both MPS and CPU."
else:
    print("No valid audio file to transcribe.")

Loading Whisper model 'small' for MPS (Apple Silicon GPU)...
ERROR: An error occurred during Whisper transcription: unsupported device mps
Trying fallback to CPU if MPS failed...





--- Transcription Result (CPU) ---
 So hello everyone my name is Harshad and I hope you all are good as well so yeah this  is my interview and I have been talking about several things for the past few days.
--------------------------


In [24]:
from dotenv import load_dotenv

In [28]:
load_dotenv()

True