In [1]:
import os 
from dotenv import load_dotenv
from urllib.request import urlretrieve
from urllib.error import URLError

In [2]:
load_dotenv()

True

### Get local data

In [5]:
REPO_URL = "https://github.com/langfuse/langfuse-python"
download_path = "static"
os.makedirs(download_path, exist_ok=True)

test_files = ["puton.jpg", "joke_prompt.wav", "bitcoin.pdf"]
raw_url = f"{REPO_URL}/raw/main/{download_path}"

for file in test_files:
   try:
       urlretrieve(f"{raw_url}/{file}", f"{download_path}/{file}")
       print(f"Successfully downloaded: {file}")
   except URLError as e:
       print(f"Failed to download {file}: {e}")
   except OSError as e:
       print(f"Failed to save {file}: {e}")

Successfully downloaded: puton.jpg
Successfully downloaded: joke_prompt.wav
Successfully downloaded: bitcoin.pdf


### Langfuse Multimodel logging

#### Image Data

In [6]:
from langfuse.openai import openai
import base64

client = openai.OpenAI()

def encode_file(image_path):
    with open(image_path, "rb") as file:
        return base64.b64encode(file.read()).decode("utf-8")

In [24]:
content_path = "static/puton.jpg"
content_type = "image/jpeg"

base64_image = encode_file(content_path)

response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "What’s in this image?"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:{content_type};base64,{base64_image}"
                    },
                },
            ],
        }
    ],
    max_tokens=300,
)

print(response.__dict__)

openai.flush_langfuse()

{'id': 'chatcmpl-BCpI7SJtTx2zNENjpoK2jE1267Iwy', 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="The image features a dog with curly fur, sitting with its front paws resting on a person's knee. The dog appears happy, with its tongue out. In the background, there are a few people standing, and the setting seems to be a cozy indoor space with wooden floors and a colorful rug.", refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], 'created': 1742396799, 'model': 'gpt-4o-mini-2024-07-18', 'object': 'chat.completion', 'service_tier': 'default', 'system_fingerprint': 'fp_3267753c5d', 'usage': CompletionUsage(completion_tokens=60, prompt_tokens=25514, total_tokens=25574, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=24704

#### Audio Data

In [22]:
content_path = "static/joke_prompt.wav"

base64_string = encode_file(content_path)

response = client.chat.completions.create(
    model="gpt-4o-audio-preview",
    modalities=["text", "audio"],
    audio={"voice": "alloy", "format": "wav"},
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Do what this recording says."},
                {
                    "type": "input_audio",
                    "input_audio": {"data": base64_string, "format": "wav"},
                },
            ],
        },
    ],
)

print(response.__dict__)

openai.flush_langfuse()

{'id': 'chatcmpl-BCpEqboDIXVQocT4w89nQWq6bb7uP', 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=[], audio=ChatCompletionAudio(id='audio_67dadcb667e88190b781cebbc1ef8cb1', data=<langfuse.media.LangfuseMedia object at 0x1281d3770>, expires_at=1742400198, transcript="Why don't they play hide and seek in Berlin? Because no matter how good you are at hiding, the Berlin Wall always seems to give people away!"), function_call=None, tool_calls=None))], 'created': 1742396596, 'model': 'gpt-4o-audio-preview-2024-12-17', 'object': 'chat.completion', 'service_tier': 'default', 'system_fingerprint': 'fp_31e26c9138', 'usage': CompletionUsage(completion_tokens=240, prompt_tokens=66, total_tokens=306, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=196, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=44), prompt_tokens_details=PromptTokens