In [1]:
import whisper
import pyaudio
import wave
from docx import Document

# Carregar o modelo Whisper
model = whisper.load_model("base")

# Configurações de áudio
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 5  # Duração da gravação em segundos
OUTPUT_FILE = "output.wav"

# Inicializar PyAudio
audio = pyaudio.PyAudio()

# Iniciar gravação
stream = audio.open(format=FORMAT, channels=CHANNELS,
                    rate=RATE, input=True,
                    frames_per_buffer=CHUNK)

print("Recording...")
frames = []

for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("Finished recording.")

# Parar e fechar a stream
stream.stop_stream()
stream.close()
audio.terminate()

# Salvar a gravação em um arquivo WAV
with wave.open(OUTPUT_FILE, 'wb') as wf:
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(audio.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))

# Carregar o áudio e transcrever
audio = whisper.load_audio(OUTPUT_FILE)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)

# Detectar o idioma falado
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")

# Decodificar o áudio
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)

# Imprimir o texto reconhecido
print(result.text)

# Criar um documento Word e salvar o texto
document = Document()
document.add_heading('Transcrição de Áudio', 0)
document.add_paragraph(result.text)
document.save("transcricao.docx")

print("Transcription saved to transcricao.docx")


Recording...
Finished recording.
Detected language: pt
em texto para 5 segundos.
Transcription saved to transcricao.docx
