In [None]:
import tempfile
import pyaudio
import openai
import os

from pydub import AudioSegment
from dotenv import load_dotenv

load_dotenv()

# Initialize OpenAI API
openai.api_key = os.getenv("OPENAI_API_KEY")

# Initialize PyAudio
chunk_size = 1024
audio_format = pyaudio.paInt16
channels = 1
sample_rate = 44000
audio = pyaudio.PyAudio()

In [None]:
# Create the audio stream and transcription loop
stream = audio.open(format=audio_format,
                channels=channels,
                rate=sample_rate,
                input=True,
                frames_per_buffer=chunk_size)

# Initialize frames to store audio
frames = []

# Collect the audio until stopped
while True:
  try:
    # Read audio data from the stream
    audio_data = stream.read(chunk_size)
    frames.append(audio_data)

  except KeyboardInterrupt:
    # Stop the stream, close it, and terminate the PyAudio instance
    stream.stop_stream()
    stream.close()
    audio.terminate()
    break

In [None]:
# Store the audio in a temporary file and transcribe it
with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as tmp:
    audio_segment = AudioSegment(
        b''.join(frames),
        frame_rate=sample_rate,
        sample_width=audio.get_sample_size(audio_format),
        channels=channels
    )
    audio_segment.export(tmp, format="wav")

    response = openai.Audio.transcribe("whisper-1", tmp)
    print(response)
