In [None]:
import whisper
from openai import OpenAI
from pydub import AudioSegment
import os

In [None]:
def split_audio(file_path, chunk_length_ms=30000, overlap_ms=1000):
    audio = AudioSegment.from_file(file_path)
    chunks = []

    start = 0
    end = chunk_length_ms

    while start < len(audio):
        chunk = audio[start:end]
        chunks.append(chunk)
        
        # Increment start by chunk length minus overlap
        start += chunk_length_ms - overlap_ms
        end = start + chunk_length_ms

    return chunks


In [None]:
def transcribe_chunks(chunks, model_name='base'):
    model = whisper.load_model(model_name)
    transcriptions = []

    for i, chunk in enumerate(chunks):
        # Export the chunk to a temporary file
        chunk_file = f"temp_chunk_{i}.wav"
        chunk.export(chunk_file, format="wav")

        # Transcribe the audio chunk using Whisper
        result = model.transcribe(chunk_file)
        transcriptions.append(result["text"])

    return transcriptions

In [None]:
def clean_up_temp_files(chunks):
    for i in range(len(chunks)):
        os.remove(f"temp_chunk_{i}.wav")

# Example usage


In [None]:
audio_chunks = split_audio("Akhundzade3.mp3")
audio_chunks = audio_chunks[0:2]
transcriptions = transcribe_chunks(audio_chunks)
full_transcription = ' '.join(transcriptions)
print(full_transcription)
clean_up_temp_files(audio_chunks)

In [None]:
client = OpenAI()

completion = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {"role": "system", "content": "You are a kind assistant, skilled in transforming poorly written farsi into proper formal language, without mistakes and misunderstandings. You avoid adding comments, you just rewrite the user input and do not add any comment."},
    {"role": "user", "content": f"{full_transcription}"}
  ]
)

print(completion.choices[0].message.content)