In [1]:
import os
from openai import OpenAI
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
API_KEY = os.getenv("OPENAI_API_KEY_VOICE")
client = OpenAI(api_key=API_KEY)
audio_file_path = "data/caseoh burger king.mp3" 

In [3]:
def transcribe_audio(audio_path, model="gpt-4o-transcribe"):
    """
    Transcribe an audio file using OpenAI's GPT-4o audio transcription.
    
    Args:
        audio_path (str): Path to the audio file
        model (str): Model to use for transcription (default: gpt-4o-transcribe)
    
    Returns:
        dict: Transcription result containing the text and other metadata
    """
    try:
        # Check if file exists
        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"Audio file not found: {audio_path}")
        
        # Open and transcribe the audio file
        with open(audio_path, "rb") as audio_file:
            transcription = client.audio.transcriptions.create(
                model=model,  
                file=audio_file            )
        
        return transcription
    
    except Exception as e:
        print(f"Error during transcription: {str(e)}")
        return None


In [4]:
# Transcribe the audio file
result = transcribe_audio(audio_file_path)
result.to_dict()

{'text': "I mean I drive to a Burger King, can I please get a Waffle Junior with onion rings, make it a meal so I can get a drink, no I'm not finished that's not everything, can I please get a double waffle with no cheese, can I please get a number two with an off drink, I got money so I don't care how much it costs me, so just throw in some extra fries don't make them salty, all that cheese gonna make my booty drip drip, I'm lactose intolerant I don't sip look, if I see a side of cheese I'm a trip trip, I'm a sit on your toilet seat and doodoo the nip, so you got my lil waffle junior have to decorate that, and you got my double waffle have to decorate that, what about my onion ring, it was all you could sit back, Burger King ain't on me now, cheese I don't want that, and I'm getting hungry now, I know you heard that, waiting for my onion rings so I don't have to turn back, Burger King don't play with me, yeah that shit's a soul trap, seconds and I ain't rapping nickels, but yeah you d

In [5]:
source_input_filename = audio_file_path.split("/")[-1].split(".")[0]
if result:
    output_file = f"output/{source_input_filename}_transcription.txt"
    with open(output_file, "w") as f:
        f.write(result.text)
    print(f"Transcription saved to: {output_file}")


Transcription saved to: output/caseoh burger king_transcription.txt


In [12]:
def transcribe_audio_diarize(audio_path, model="gpt-4o-transcribe-diarize"):
    """
    Transcribe an audio file using OpenAI's GPT-4o audio transcription.
    
    Args:
        audio_path (str): Path to the audio file
        model (str): Model to use for transcription (default: gpt-4o-transcribe)
    
    Returns:
        dict: Transcription result containing the text and other metadata
    """
    try:
        # Check if file exists
        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"Audio file not found: {audio_path}")
        
        # Open and transcribe the audio file
        with open(audio_path, "rb") as audio_file:
            transcription = client.audio.transcriptions.create(
                model=model,  
                file=audio_file,
                response_format="diarized_json",
                chunking_strategy="auto"
                )
        
        return transcription
    
    except Exception as e:
        print(f"Error during transcription: {str(e)}")
        return None

In [13]:
audio_file_path_diarize = "data/cnbc oxychem sales.mp3"

In [14]:
# Transcribe the audio file
result = transcribe_audio_diarize(audio_file_path_diarize)
result.to_dict()

{'segments': [{'id': 'seg_0',
   'end': 0.8,
   'speaker': 'A',
   'start': 0.0,
   'text': " Well, it's official.",
   'type': 'transcript.text.segment'},
  {'id': 'seg_1',
   'end': 3.95,
   'speaker': 'A',
   'start': 0.95,
   'text': " Berkshire Hathaway is acquiring Occidental's chemical business,",
   'type': 'transcript.text.segment'},
  {'id': 'seg_2',
   'end': 5.050000000000001,
   'speaker': 'A',
   'start': 4.15,
   'text': ' OxyChem.',
   'type': 'transcript.text.segment'},
  {'id': 'seg_3',
   'end': 6.900000000000001,
   'speaker': 'A',
   'start': 5.300000000000001,
   'text': " It's an all-cash transaction.",
   'type': 'transcript.text.segment'},
  {'id': 'seg_4',
   'end': 9.250000000000002,
   'speaker': 'A',
   'start': 7.250000000000001,
   'text': " It's valued at $9.7 billion,",
   'type': 'transcript.text.segment'},
  {'id': 'seg_5',
   'end': 12.400000000000002,
   'speaker': 'A',
   'start': 9.300000000000002,
   'text': " and this is Berkshire's biggest purc

In [15]:
source_input_filename_diarize = audio_file_path_diarize.split("/")[-1].split(".")[0]

output_dict = result.to_dict() if result else None
if output_dict:
    output_json_file = f"output/{source_input_filename_diarize}_transcription.json"
    with open(output_json_file, "w") as f:
        import json
        json.dump(output_dict, f, indent=2)
    print(f"Full diarized transcription (dict) saved to: {output_json_file}")


# if result:
#     output_file = f"output/{source_input_filename_diarize}_transcription.txt"
#     with open(output_file, "w") as f:
#         f.write(result.text)
#     print(f"Transcription saved to: {output_file}")

Full diarized transcription (dict) saved to: output/cnbc oxychem sales_transcription.json
