# Get API Keys

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

# Set up Tracing

In [2]:
# from openai import AsyncOpenAI
from langfuse.openai import AsyncOpenAI

# Set up the client

In [3]:
client = AsyncOpenAI()

In [4]:
GPT4O_MINI = "o4-mini-2025-04-16"

# Chat Helpers

In [5]:
def _msg(role, content):
    return {'role': role, 'content': content}

def system(content):
    return _msg('system', content)

def user(content):
    return _msg('user', content)

def assistant(content):
    return _msg('assistant', content)

# Cache to disk

In [6]:
from diskcache import Cache

In [7]:
# cache = Cache() # temporary cache
cache = Cache(directory=".cache_course")

In [8]:
#cache.set("Hello", "blabla")
#cache.get("Hello")

In [9]:
import asyncio

In [10]:
async def set_async(key, val, **kwargs):
    return await asyncio.to_thread(cache.set, key, val, **kwargs)

async def get_async(key, default=None, **kwargs):
    return await asyncio.to_thread(cache.get, key, default, **kwargs)

In [11]:
# sanity checks
(
    await get_async("key_that_does_not_exist", default="NOT FOUND"),
    await get_async("Hello"),
    await set_async("key_exist", "yes"),
    await get_async("key_exist"),
)

('NOT FOUND', 'blabla', True, 'yes')

# LLMs calls are cached and as well traced

In [12]:
from hashlib import md5

In [13]:
import json

def make_cache_key(key_name, **kwargs):
    kwargs_string = json.dumps(kwargs, sort_keys=True)
    kwargs_hash = md5(kwargs_string.encode('utf-8')).hexdigest()
    cache_key = f"{key_name}__{kwargs_hash}"
    
    return cache_key

In [14]:
def _make_cache_key_for_chat_completion(
    *,
    model,
    messages,
    **kwargs,
):
    return make_cache_key(
        "openai_chat_completion",
        model=model,
        messages=messages,
        **kwargs
    )

In [34]:
from random import randint

class FakeRateLimitError(RateLimitError):
    def __init__(self):
        pass

try:
    if (randint(0, 10) % 2 == 0):
        raise FakeRateLimitError()
    else:
        print('all good')
except RateLimitError as e:
    print("caught")

caught


In [40]:
from openai.types.chat import ChatCompletion
from functools import update_wrapper, wraps
from openai import APITimeoutError, RateLimitError
import backoff

CACHE_MISS_SENTINEL = object()

@wraps(client.chat.completions.create)
async def cached_chat_completion(
    *,
    model,
    messages,
    **kwargs,
) -> ChatCompletion:
    # CREATE CAHE KEY
    cache_key = _make_cache_key_for_chat_completion(
        model=model,
        messages=messages,
        **kwargs
    )
    cached_value = await get_async(cache_key, default=CACHE_MISS_SENTINEL)
    # CACHE MISS
    if cached_value is CACHE_MISS_SENTINEL:
        @backoff.on_exception(
            backoff.expo,
            (APITimeoutError, RateLimitError)
        )
        async def do_call():
            print("trying")
            if (randint(0, 10) % 2 == 0):
                print("rate limit error")
                raise FakeRateLimitError()
            return await client.chat.completions.create(
                model=model,
                messages=messages,
                **kwargs
            )
        completion = await do_call()
        await set_async(cache_key, completion.json())
        return completion
    # CACHE HIT
    else:
        # TODO: Tracing code (next section)
        return ChatCompletion.validate(json.loads(cached_value))

#cached_chat_completion = update_wrapper(
#    cached_chat_completion,
#    wrapped=client.chat.completions.create
#)
    

In [41]:
completion = await cached_chat_completion(
    messages=[user("What is caching in software engineering??????????!!")],
    model=GPT4O_MINI,
    max_completion_tokens=1000,
    temperature=1.0
)

trying
rate limit error
trying
rate limit error
trying
rate limit error
trying


/tmp/ipykernel_809027/1437833088.py:39: PydanticDeprecatedSince20: The `json` method is deprecated; use `model_dump_json` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  await set_async(cache_key, completion.json())
