# Load API Keys

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

# Tracing

In [2]:
from langfuse.openai import AsyncOpenAI  # autoinstrmenttion

# Setup LLM Call Helpers

In [3]:
client = AsyncOpenAI()

In [4]:
GPT4O_MINI = "gpt-4o-mini-2024-07-18"

In [5]:
def _msg(role, content):
    return {'role': role, 'content': content}

def system(content):
    return _msg('system', content)

def user(content):
    return _msg('user', content)

def assistant(content):
    return _msg('assistant', content)

# Cache System 

In [6]:
from diskcache import Cache

In [7]:
cache = Cache(directory=".cache_course")

In [8]:
import asyncio

In [9]:
async def set_async(key, val, **kwargs):
    return await asyncio.to_thread(cache.set, key, val, **kwargs)

async def get_async(key, default=None, **kwargs):
    return await asyncio.to_thread(cache.get, key, default, **kwargs)

# Implementing Cached, Retried and Traced Structured Ouputs completion

In [10]:
import json
from hashlib import md5

def make_cache_key(key_name, **kwargs):
    kwargs_string = json.dumps(kwargs, sort_keys=True)
    kwargs_hash = md5(kwargs_string.encode('utf-8')).hexdigest()
    cache_key = f"{key_name}__{kwargs_hash}"
    return cache_key

In [11]:
from pydantic import BaseModel

def _make_key_for_cached_chat_completion_parsed_with_retry(
    *,
    model,
    messages,
    response_format: BaseModel,
    **kwargs,
):
    return make_cache_key(
        "openai_parsed_chat",
        model=model,
        messages=messages,
        response_format=response_format.model_json_schema(),
        **kwargs
    )

In [12]:
from openai.types.chat import ParsedChatCompletion
from functools import wraps
from openai import APITimeoutError, RateLimitError
from typing_extensions import TypeVar
import backoff

ResponseFormatT = TypeVar("ResponseFormatT", bound=BaseModel)

CACHE_MISS_SENTINEL = object()


@wraps(client.chat.completions.parse)
async def cached_chat_completion_parsed_with_retry(
    *,
    model,
    messages,
    response_format: ResponseFormatT,
    **kwargs,
) -> ParsedChatCompletion[ResponseFormatT]:
    # CREATE CACHE KEY
    cache_key = _make_key_for_cached_chat_completion_parsed_with_retry(
        model=model,
        messages=messages,
        response_format=response_format,
        **kwargs
    )

    cached_value = await get_async(cache_key, default=CACHE_MISS_SENTINEL)
    # CACHE MISS
    if cached_value is CACHE_MISS_SENTINEL:
        @backoff.on_exception(
            backoff.expo,
            (APITimeoutError, RateLimitError)
        )
        async def do_call():
            return await client.chat.completions.parse(
                model=model,
                messages=messages,
                response_format=response_format,
                **kwargs
            )
        completion = await do_call()
        await set_async(cache_key, completion.model_dump_json())
        return completion
    # CACHE HIT
    else:
        # TODO: Tracing Code (next section)
        # return 
        completion = ParsedChatCompletion.model_validate(json.loads(cached_value))
        for choice in completion.choices:
            if not choice.message.refusal:
                choice.message.parsed = response_format.model_validate(
                    choice.message.parsed
                )
        return completion
        
        

# DATA_GENERATION

## Load the dataset

In [13]:
import pandas as pd

In [14]:
emails = pd.read_csv('paul_allen_sent_emails.csv')

In [15]:
emails.head(n=3)

Unnamed: 0,content,Message-ID,Date,Correspondants
0,"jeff, i want to bid $2.8 for sagewood with a r...",<13537630.1075855669909.JavaMail.evans@thyme,"Mon, 31 Dec 1979 16:00:00 -0800 (PST",Jeff Smith
1,attached is the systems wish list for the gas ...,<27903020.1075855669931.JavaMail.evans@thyme,"Mon, 31 Dec 1979 16:00:00 -0800 (PST","John J Lavorato, Beth Perlman, Hunter S Shivel..."
2,how is your racing going? what category are yo...,<12929996.1075855668941.JavaMail.evans@thyme,"Mon, 31 Dec 1979 16:00:00 -0800 (PST",muller@thedoghousemail.co


In [16]:
print(f"{emails.shape[0]} lines x {emails.shape[0]} rows")

396 lines x 396 rows


## Prompt Engineering

In [45]:
from jinja2 import Template # To take better into account the spcecial characters that print(f"{user_name} {prompt}\n\nThis is a test for dedent...")
from textwrap import dedent # To remove all the characters that we don't want

## Test jinja2 and textwrap.dedent

In [46]:
# Without dedent
prompt_template = Template(
    """\
    {{ user_name }} {{ prompt }}

    This is a test for dedent.

    I want to check.
    """
)

In [47]:
print(prompt_template.render(user_name="Paul Allen", prompt="Generate questions from emails"))

    Paul Allen Generate questions from emails

    This is a test for dedent.

    I want to check.
    


In [48]:
prompt_template = Template(
    dedent(
        """\
        {{ user_name }} {{ prompt }}

        This is a test for dedent.

        I want to check.
        """
    )
)

In [49]:
print(prompt_template.render(user_name="Paul Allen", prompt="Generate questions from emails"))

Paul Allen Generate questions from emails

This is a test for dedent.

I want to check.


## Apply the prompt to real data

In [50]:
from pydantic import BaseModel

class GeneratedQuestions(BaseModel):
    useless_to_recall: bool
    questions: list[str]

In [51]:
prompt_template = Template(
    dedent(
        """\
        You are Paul Allen, also known as Phillip Allen, and you sent this email.
        
        <email_content>
        {{ email_content }}
        </email_content>
        
        You will read one email and decide if generating retrieval questions is useful.
        If it is useful, generate high-signal, content-bearing questions that are directly
        answerable from the email text itself (no outside knowledge).
        
        
        DECISION FIRST (think silently, then output JSON only)
        Classify the email using this rubric:
        
        MARK AS USELESS (set "useless_to_recall": true) **IF ANY** of these are true:
        - ATTACHMENT-ONLY/EXTERNAL-ONLY: The body only references external material
          (e.g., "attached...", "see link...") **without** summarizing key contents.
        - CHITCHAT/PHATIC: Greetings or personal small talk without concrete facts
          (e.g., “How’s your training going?”, “Hope you’re well.”).
        - ADMIN/BOILERPLATE: Autoresponder, unsubscribe, disclaimers, or purely
          mechanical notices with no content-bearing facts.
        - EMPTY/NEAR-EMPTY: Too short or vague to form answerable questions; no specific
          entities, numbers, dates, terms, or commitments.
        - NON-ANSWERABLE PROMPTS: The text is only requests to the recipient about their
          status/opinion (not facts stated in the email itself).
        
        MARK AS USEFUL (set "useless_to_recall": false) only if the email contains
        **content-bearing facts** that can be turned into self-contained questions.
        Content-bearing facts include (any of):
        - Named entities (projects/places/organizations/products)
        - Specific quantities (numbers, currencies, percentages, counts)
        - Dates/durations/terms (e.g., 30-year term, due Friday, Q3)
        - Explicit actions/commitments/constraints (propose, approve, ship, capped at…)
        - Conditions that bind facts (e.g., “contingent on X”, “dependent on Y”)
        
        MINIMUM SIGNAL RULE
        - If the email does **not** contain at least one concrete, answerable fact span,
          mark as useless.
        
        IF USEFUL, GENERATE QUESTIONS
        - Questions must be self-contained and directly answerable from the email text.
        - Avoid clarifying/meta questions (e.g., “Am I certain…”, “What type of deal is this?”),
          and avoid formatting/recipient/greeting topics.
        - Keep lexical hooks (proper nouns, exact numerals/units).
        
        OUTPUT SCHEMA (return JSON only; no extra text)
        ```json
        {
          "useless_to_recall": boolean,
          "useless_reason": "attachment_only | chitchat | boilerplate | empty | non_answerable | n/a",
          "questions": ["string", "..."]
        }
        """
    )
)

## Iterate on the prompt

In [52]:
email = emails.iloc[0]

In [53]:
email.content

'jeff, i want to bid $2.8 for sagewood with a rate 8.5% or less and dependent on 30 year term'

In [54]:
prompt = prompt_template.render(
    email_content=email.content
)

In [55]:
print(prompt)

You are Paul Allen, also known as Phillip Allen, and you sent this email.

<email_content>
jeff, i want to bid $2.8 for sagewood with a rate 8.5% or less and dependent on 30 year term
</email_content>

You will read one email and decide if generating retrieval questions is useful.
If it is useful, generate high-signal, content-bearing questions that are directly
answerable from the email text itself (no outside knowledge).


DECISION FIRST (think silently, then output JSON only)
Classify the email using this rubric:

MARK AS USELESS (set "useless_to_recall": true) **IF ANY** of these are true:
- ATTACHMENT-ONLY/EXTERNAL-ONLY: The body only references external material
  (e.g., "attached...", "see link...") **without** summarizing key contents.
- CHITCHAT/PHATIC: Greetings or personal small talk without concrete facts
  (e.g., “How’s your training going?”, “Hope you’re well.”).
- ADMIN/BOILERPLATE: Autoresponder, unsubscribe, disclaimers, or purely
  mechanical notices with no content-b

In [56]:
MODEL = GPT4O_MINI

In [57]:
completion = await cached_chat_completion_parsed_with_retry(
    model=MODEL,
    messages=[user(prompt)],
    response_format=GeneratedQuestions
)

In [58]:
completion.choices[0].message.parsed.questions

['What is the bid amount for Sagewood?',
 'What interest rate is being proposed for the bid?',
 'What is the term length associated with the bid for Sagewood?']

## Bad questions category

'Am I certain that the rate should be 8.5% or less?' --> clarifying - not useful to retrieve the email
'What type of deal or project is Sagewood related to?' --> clarifying question - does not help to retrieve the email
'Did I address the recipient by name in the email?' --> does not relate to the content
'What type of deal or project is Sagewood related to?' --> clarifying question. Does not help to retrieve the email

In [59]:
async def try_the_prompt(i):
    email = emails.iloc[i]
    prompt = prompt_template.render(
        email_content=email.content
    )
    completion = await cached_chat_completion_parsed_with_retry(
        model=MODEL,
        messages=[user(prompt)],
        response_format=GeneratedQuestions
    )
    return completion.choices[0].message.parsed, email.content

In [66]:
r, e = await try_the_prompt(2)

In [67]:
e

'how is your racing going? what category are you up to? i'

In [68]:
print(f"List of questions: {r.questions}\n\nUseless to recall? {r.useless_to_recall}")

List of questions: []

Useless to recall? True


### notes on what we don't like in the inputs

'attached is the systems wish list for the gas basis and physical trading' -> useless because we don't have access to the attachment
'how is your racing going? what category are you up to? i' -> generated ['How is your racing going?', 'What category are you up to in racing?'] : bad questions (it is useless to recall this)

## Batching Calls

The goal is to iterate much faster to identify the useless to recall emails.