# Set UP

In [1]:
import os
from pprint import pprint
from IPython.display import Markdown, display
from langfuse.openai import AsyncOpenAI
from dotenv import load_dotenv
load_dotenv()
client = AsyncOpenAI()

# from langfuse.openai import openai
# from openai import OpenAI

In [2]:
def _msg(role, content): #returns a dict
    return {'role':role, 'content':content}

def user(content):
    return _msg('user', content)

def system(content):
    return _msg('system', content)

def assistant(content):
    return _msg('assistant', content)

# Caching

In [3]:
from diskcache import Cache
# cache = Cache() --> this is a temporary cache where it's not saved when the program is restart
cache = Cache(directory = '.cache_coures_dana') # This is not a temporary where tha cache is saved in a directory
                                                # and can be accessed any time


In [4]:
# make the caching get and set Asyc:
# wrapper thing
# when making async function: 'async' key word, when calling it, 'await' key word
import asyncio
async def set_async_cache(key, val, **kwargs): # needs key and value, and it can take other args by their key words(kwargs)
    # return sth that is awiat
    return await asyncio.to_thread(cache.set, key, val, **kwargs)

async def get_async_cache(key, **kwargs): # needs key, kwargs, and a default can be passed with kwargs
    return await asyncio.to_thread(cache.get, key, **kwargs)

# to call async func must be await

### Make the cache key with hashing

In [5]:
from hashlib import md5
import json
# print(md5(b'dana the one').hexdigest())
# md5 works with only str

def cache_key(key, **kwargs):
    kwargs_str = json.dumps(kwargs, sort_keys = True)
    kwargs_hashed = md5(kwargs_str.encode('utf-8')).hexdigest()
    hashed_key = f'{key}__{kwargs_hashed}'
    return hashed_key

def _chat_completion_cache_key(*, model, messages, **kwargs): # the first * to make the function only accepts key words args
    return cache_key('openai_chat_completion', model = model, messages = messages, **kwargs)

In [6]:
from openai.types.chat import ChatCompletion
from functools import update_wrapper
CACHE_MISS_SENTINEL = object() # we need to create sth that can be created ONLY once and can't be created accidentally anytime, like a memory address => an object

async def cached_chat_completion(*, model, messages, **kwargs) -> ChatCompletion : # the 'async' because we want to use async functions
    # MAKE KEY
    cache_key = _chat_completion_cache_key(model = model, messages = messages, **kwargs)
    cached_value = await get_async_cache(cache_key, default = CACHE_MISS_SENTINEL) # we will use this as default when the key can't be found in the cache
    
    # CACHE MISS
    if cached_value is CACHE_MISS_SENTINEL:
        completion = await client.chat.completions.create(model = model, messages = messages, **kwargs) # a) make openai call
        await set_async_cache(cache_key, completion.model_dump_json()) # b) set the output in the cache
        return completion # this is a chat completion type

    # CACHE HIT
    else:
        return ChatCompletion.model_validate(json.loads(cached_value)) # Cached Value is the vlue we need

cached_chat_completion = update_wrapper(cached_chat_completion, client.chat.completions.create) # for auto completion

In [7]:
completion = await cached_chat_completion(
    messages = [user('What is "Hello" in German?')],
    model = 'gpt-4o-mini'
)
completion

ChatCompletion(id='chatcmpl-BX2E6DWKucjXIdxDDjabcf6rrY97C', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='"Hello" in German is "Hallo."', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1747213082, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_dbaca60df0', usage=CompletionUsage(completion_tokens=10, prompt_tokens=15, total_tokens=25, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))

In [8]:
completion = await cached_chat_completion(
    messages = [user('What is "Hello" in German?')],
    model = 'gpt-4o-mini'
)
completion

ChatCompletion(id='chatcmpl-BX2E6DWKucjXIdxDDjabcf6rrY97C', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='"Hello" in German is "Hallo."', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1747213082, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_dbaca60df0', usage=CompletionUsage(completion_tokens=10, prompt_tokens=15, total_tokens=25, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))