In [None]:
import whisper
import numpy as np
import sounddevice as sd
import queue
import time
import os

In [None]:
model = whisper.load_model("base") # we can also use 'tiny'


q = queue.Queue()

In [None]:
def callback(indata, frames, time, status):
    if status:
        print(status)
    q.put(indata.copy())

In [None]:
samplerate = 16000  # Whisper expects 16kHz audio
blocksize = 4000
max_recording_time = 30  # Max recording duration in seconds
buffer = []
output_file = "transcription.txt"

In [None]:
# Determine the last used ID if the file exists
if os.path.exists(output_file):
    with open(output_file, "r") as f:
        lines = f.readlines()
        last_id = 0
        for line in lines:
            if line.strip().startswith("ID:"):
                try:
                    last_id = int(line.strip().split(":")[1])
                except ValueError:
                    continue
else:
    last_id = 0  # Start with ID 1 if the file does not exist

In [None]:
print("🎙️ Start Speaking... (Recording for 30 seconds)")



In [None]:
start_time = time.time()
with sd.InputStream(samplerate=samplerate, channels=1, dtype="int16", callback=callback):
    while time.time() - start_time < max_recording_time:  
        audio_data = q.get()
        buffer.append(audio_data)

In [None]:
audio_data = np.concatenate(buffer, axis=0)
audio_data = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0

In [None]:
result = model.transcribe(audio_data, fp16=False)
transcript_text = result["text"]

In [None]:
last_id += 1
entry = f"ID: {last_id}\n{transcript_text}\n\n"

In [None]:
print("\n📝 Transcription:\n" + entry)

In [None]:
with open(output_file, "a") as f:
    f.write(entry)

print("✅ Transcription saved to 'transcription.txt'. Program terminated.")

A text file named as transcription will be generated.
The text file contains ID's of all the texts which are generated from speeches  till now are appended.
if we want to give input to the model just extract the last ID's text
this text dataset will also be useful for further if we require to train the own model.