# Transcribe and summarize

Use openAI to transcribe and summarize audio files.

In [1]:
import os
from openai import OpenAI
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"),)

## Input audio files

In [2]:
# Set the directory path you want to search in
directory_path = '/Users/arno.klein/Documents/steinberg-videos/audio'

# List all file paths to files ending with '.m4a'
audio_append = '.m4a'
audio_files = [os.path.join(directory_path, file) for file in os.listdir(directory_path) if file.endswith(audio_append)]

## Output transcript and summary files

In [3]:
# Output transcript and summary file paths
transcript_files = [os.path.join(directory_path, file + '_transcript.txt') for file in os.listdir(directory_path) if file.endswith(audio_append)]
summary_files = [os.path.join(directory_path, file + '_summary.txt') for file in os.listdir(directory_path) if file.endswith(audio_append)]

## Transcribe and summarize functions

In [4]:
def transcribe(audio_file):
    file_handle = open(audio_file, 'rb')
    transcript = client.audio.transcriptions.create(
    model="whisper-1", file=file_handle)
    return transcript

In [5]:
def summarize(transcript):
    system_prompt = """Please provide a clear summary of the provided transcript, understandable for someone with a high-school education. \n\nTranscript:"""
    summary = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
            {"role":"system","content": system_prompt},
            {"role":"user","content": transcript.text}]
    )
    return summary

## Loop through, transcribe, and summarize audio files

In [None]:
for ifile, audio_file in enumerate(audio_files):
    transcript = transcribe(audio_file)
    with open(transcript_files[ifile], "w") as file_buffer:
        file_buffer.write(transcript.choices[0].message.content)
    summary = summarize(transcript)
    with open(summary_files[ifile], "w") as file_buffer:
        file_buffer.write(summary.choices[0].message.content)
    print('Summary written to {0}'.format(summary_files[ifile]))