Start by transcribing something small; 5 minutes audio file took 6.5 minutes to run for me.

In [None]:
import whisper

def audioToText(audioPath, outputPath, initialPrompt):
    model = whisper.load_model("base") # choose tiny, base, small, medium, large

    # transcribe audio
    result = model.transcribe(audioPath, fp16=False,
                              language="sv",
                              task="transcribe",
                              temperature=0.4,
                              best_of=2, # number of candidates to consider
                              initial_prompt=initialPrompt
                             )

    # write transcribed text to file
    with open(outputPath, "w") as file:
        file.write(result["text"])

audioFilepath = "data/AUDIOFILE.mp3"
textFilepath = "data/TRANSCRIPTION.txt"

audioToText(audioFilepath, textFilepath, initialPrompt = "Den här ljudfilen handlar om...")

Now that we have our transcription, lets use a GPT to improve and/or summarize. More stuff can be added later.

In [1]:
from openai import OpenAI
import os 

client = OpenAI() # defaults to getting the key using os.environ.get("OPENAI_API_KEY")
MODEL = "gpt-3.5-turbo"

In [2]:
def readText(filePath):
    with open(filePath, 'r') as file:
        return file.read()

def writeText(filePath, content):
    with open(filePath, 'w') as file:
        file.write(content)

def manageText(text, todo):
    if todo not in ['summarize', 'improve']:
        raise ValueError(f"Invalid input value: {todo}. Must be 'summarize' or 'improve'")
    
    match todo:
        case 'summarize':
            messages=[
                {"role": "system", "content": "You are a helpful assistant who specializes in summarizing transcribed text in Swedish."},
                {"role": "user", "content": f"The following text is a transcription from spoken Swedish. Can you summarize this text:\n\n{text}"}
            ]
        case 'improve':
             messages=[
                {"role": "system", "content": "You are a helpful assistant who specializes in improving transcribed text in Swedish."},
                {"role": "user", "content": f"The following text is a transcription from spoken Swedish. Can you improve this text:\n\n{text}"}
            ]
    
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        max_tokens=1024,
        temperature=0.7
    )
    return response.choices[0].message.content

In [None]:
improvedFile = 'data/TRANSCRIPTIONimproved.txt'
summarizedFile = 'data/TRANSCRIPTIONsummarized.txt'

improvedText = manageText(readText(textFilepath), 'improve')
summarizedText = manageText(readText(textFilepath), 'summarize')

# write the improved text
writeText(improvedFile, improvedText)
print(f"Improved text written to {improvedFile}.")

# write the summarized text
writeText(summarizedFile, summarizedText)
print(f"Summarized text written to {summarizedFile}.")