In [None]:
import whisper
from openai import OpenAI
from pydub import AudioSegment
import os

client = OpenAI()

In [None]:
def split_audio(file_path, chunk_length_ms=30000, overlap_ms=1000):
    audio = AudioSegment.from_file(file_path)
    chunks = []
    start = 0
    end = chunk_length_ms
    while start < len(audio):
        chunk = audio[start:end]
        chunks.append(chunk)
        start += chunk_length_ms - overlap_ms
        end = start + chunk_length_ms
    return chunks

In [None]:
def transcribe_chunks(chunks, model_name='base'):
    model = whisper.load_model(model_name)
    transcriptions = []
    for i, chunk in enumerate(chunks):
        chunk_file = f"temp_chunk_{i}.wav"
        chunk.export(chunk_file, format="wav")
        result = model.transcribe(chunk_file)
        transcriptions.append(result["text"])
    return transcriptions

In [None]:
def clean_up_temp_files(chunks):
    for i in range(len(chunks)):
        os.remove(f"temp_chunk_{i}.wav")

In [None]:
audio_chunks = split_audio("Akhundzade3.mp3")
for i in range(0, len(audio_chunks), 4):
    transcriptions = transcribe_chunks(audio_chunks[i:i+4])
    partial_transcription = ' '.join(transcriptions)
    print(f"Raw partial transcription: {partial_transcription}")
    clean_up_temp_files(audio_chunks)
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a kind assistant, skilled in transforming poorly written farsi into proper formal language, without mistakes and misunderstandings. You avoid adding comments, you just rewrite the user input and do not add any comment."},
            {"role": "user", "content": f"{partial_transcription}"}
        ]
    )
    processed_partial_transcription = completion.choices[0].message.content
    print(f"Processed partial transcription: {processed_partial_transcription}")