# Setup

## Load API Keys

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

## Tracing

In [2]:
from langfuse.openai import AsyncOpenAI  # autoinstrumentation

## Setup OpenAI

In [3]:
client = AsyncOpenAI()

In [4]:
EMBED_MODEL = "text-embedding-3-large"

In [5]:
GPT4O_MINI = "gpt-4o-mini-2024-07-18"

## LLM Call Helpers

In [6]:
def _msg(role, content):
    return {'role': role, 'content': content}

def system(content):
    return _msg('system', content)

def user(content):
    return _msg('user', content)

def assistant(content):
    return _msg('assistant', content)

## Embedding Call Helpers

In [7]:
def get_embedding(e) -> list[float]:
    return e.data[0].embedding

## Compute Cosine Similarity

In [8]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from openai.types.create_embedding_response import CreateEmbeddingResponse

In [9]:
def embedding_cosine_sim(e1: CreateEmbeddingResponse, e2: CreateEmbeddingResponse) -> float:
    e1, e2 = get_embedding(e1), get_embedding(e2)
    to_np = lambda e: np.array(e).reshape(1, -1)
    e1, e2 = to_np(e1), to_np(e2)
    _cos_sim = cosine_similarity(e1, e2)
    return _cos_sim[0][0]

## Cache System 

In [10]:
from diskcache import Cache

In [11]:
cache = Cache(directory=".cache_course")

In [12]:
import asyncio

In [13]:
async def set_async(key, val, **kwargs):
    return await asyncio.to_thread(cache.set, key, val, **kwargs)

async def get_async(key, default=None, **kwargs):
    return await asyncio.to_thread(cache.get, key, default, **kwargs)

In [14]:
import json
from hashlib import md5

def make_cache_key(key_name, **kwargs):
    kwargs_string = json.dumps(kwargs, sort_keys=True)
    kwargs_hash = md5(kwargs_string.encode('utf-8')).hexdigest()
    cache_key = f"{key_name}__{kwargs_hash}"
    return cache_key

## [EMBEDDING] Cached and Retried Calls

In [15]:
from pydantic import BaseModel

def _make_key_for_cached_embedding_with_retry(
    *,
    model,
    input,
    **kwargs,
):
    return make_cache_key(
        "openai_parsed_chat",
        model=model,
        input=input,
        **kwargs
    )

In [16]:
from openai.types.create_embedding_response import CreateEmbeddingResponse
from functools import wraps
from openai import APITimeoutError, RateLimitError
from pydantic import BaseModel
import backoff


CACHE_MISS_SENTINEL = object()


@wraps(client.embeddings.create)
async def cached_embedding_with_retry(
    *,
    model,
    input,
    **kwargs,
) -> CreateEmbeddingResponse:
    # CREATE CACHE KEY
    cache_key = _make_key_for_cached_embedding_with_retry(
        model=model,
        input=input,
        **kwargs
    )

    cached_value = await get_async(cache_key, default=CACHE_MISS_SENTINEL)
    # CACHE MISS
    if cached_value is CACHE_MISS_SENTINEL:
        @backoff.on_exception(
            backoff.expo,
            (APITimeoutError, RateLimitError)
        )
        async def do_call():
            return await client.embeddings.create(
                model=model,
                input=input,
                **kwargs
            )
        embedding = await do_call()
        await set_async(cache_key, embedding.json())
        return embedding
    # CACHE HIT
    else:
        embedding = CreateEmbeddingResponse.validate(json.loads(cached_value))
        return embedding
        
        

## [LLM] Cached, Retried, and Traced Calls

In [17]:
from pydantic import BaseModel

def _make_key_for_cached_chat_completion_parsed_with_retry(
    *,
    model,
    messages,
    response_format: BaseModel,
    **kwargs,
):
    return make_cache_key(
        "openai_parsed_chat",
        model=model,
        messages=messages,
        response_format=response_format.model_json_schema(),
        **kwargs
    )

In [18]:
from openai.types.chat import ParsedChatCompletion
from functools import wraps
from openai import APITimeoutError, RateLimitError
from pydantic import BaseModel
from typing_extensions import TypeVar
import backoff

ResponseFormatT = TypeVar("ResponseFormatT", bound=BaseModel)

CACHE_MISS_SENTINEL = object()


@wraps(client.chat.completions.parse)
async def cached_chat_completion_parsed_with_retry(
    *,
    model,
    messages,
    response_format: ResponseFormatT,
    **kwargs,
) -> ParsedChatCompletion[ResponseFormatT]:
    # CREATE CACHE KEY
    cache_key = _make_key_for_cached_chat_completion_parsed_with_retry(
        model=model,
        messages=messages,
        response_format=response_format,
        **kwargs
    )

    cached_value = await get_async(cache_key, default=CACHE_MISS_SENTINEL)
    # CACHE MISS
    if cached_value is CACHE_MISS_SENTINEL:
        @backoff.on_exception(
            backoff.expo,
            (APITimeoutError, RateLimitError)
        )
        async def do_call():
            return await client.chat.completions.parse(
                model=model,
                messages=messages,
                response_format=response_format,
                **kwargs
            )
        completion = await do_call()
        await set_async(cache_key, completion.model_dump_json())
        return completion
    # CACHE HIT
    else:
        # TODO: Tracing Code (next section)
        # return 
        completion = ParsedChatCompletion.model_validate(json.loads(cached_value))
        for choice in completion.choices:
            if not choice.message.refusal:
                choice.message.parsed = response_format.model_validate(
                    choice.message.parsed
                )
        return completion
        
        

## Sanity Checks

In [None]:
# sanity check
embedding = await cached_embedding_with_retry(
    input="input: 'Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]'",
    model=EMBED_MODEL
)
embedding_cosine_sim(embedding, embedding)

In [None]:
# sanity check
from pydantic import BaseModel

class CalendarEvent(BaseModel):
    name: str
    date: str
    participants: list[str]

completion = await cached_chat_completion_parsed_with_retry(
    model="gpt-4o-2024-08-06",
    messages=[
        {"role": "system", "content": "Extract the event information."},
        {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
    ],
    response_format=CalendarEvent,
)

event = completion.choices[0].message.parsed
event

---

In [19]:
with open('rewritten_questions.json', 'r') as f:
    questions = json.load(f)

In [20]:
import pandas as pd

emails = pd.read_csv('paul_allen_sent_email_with_questions_v1.csv')

del emails['questions']

In [23]:
from types import SimpleNamespace

c_ = CONSTANTS = SimpleNamespace(
    MESSAGE_ID='Message-ID',
    USELESS_TO_RECALL='useless_to_recall',
    CONTENT='content',
    DATE='Date',
    CORRESPONDANTS='Correspondants',
)

In [24]:
emails[c_.MESSAGE_ID]

0      <13537630.1075855669909.JavaMail.evans@thyme
1      <27903020.1075855669931.JavaMail.evans@thyme
2      <12929996.1075855668941.JavaMail.evans@thyme
3      <29770699.1075855669609.JavaMail.evans@thyme
4      <17449361.1075855672476.JavaMail.evans@thyme
                           ...                     
391     <2313514.1075855693911.JavaMail.evans@thyme
392    <10598636.1075855693867.JavaMail.evans@thyme
393     <5195408.1075855693846.JavaMail.evans@thyme
394     <7510478.1075855693794.JavaMail.evans@thyme
395    <20840552.1075855693485.JavaMail.evans@thyme
Name: Message-ID, Length: 396, dtype: object