# Load API Keys

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

# Tracing

In [2]:
from langfuse.openai import AsyncOpenAI  # autoinstrmenttion

# Setup LLM Call Helpers

In [3]:
client = AsyncOpenAI()

In [4]:
GPT4O_MINI = "gpt-4o-mini-2024-07-18"

In [5]:
def _msg(role, content):
    return {'role': role, 'content': content}

def system(content):
    return _msg('system', content)

def user(content):
    return _msg('user', content)

def assistant(content):
    return _msg('assistant', content)

# Cache System 

In [6]:
from diskcache import Cache

In [7]:
cache = Cache(directory=".cache_course")

In [8]:
import asyncio

In [9]:
async def set_async(key, val, **kwargs):
    return await asyncio.to_thread(cache.set, key, val, **kwargs)

async def get_async(key, default=None, **kwargs):
    return await asyncio.to_thread(cache.get, key, default, **kwargs)

# Implementing Cached, Retried and Traced Structured Ouputs completion

In [10]:
import json
from hashlib import md5

def make_cache_key(key_name, **kwargs):
    kwargs_string = json.dumps(kwargs, sort_keys=True)
    kwargs_hash = md5(kwargs_string.encode('utf-8')).hexdigest()
    cache_key = f"{key_name}__{kwargs_hash}"
    return cache_key

In [11]:
from pydantic import BaseModel

def _make_key_for_cached_chat_completion_parsed_with_retry(
    *,
    model,
    messages,
    response_format: BaseModel,
    **kwargs,
):
    return make_cache_key(
        "openai_parsed_chat",
        model=model,
        messages=messages,
        response_format=response_format.model_json_schema(),
        **kwargs
    )

In [12]:
from openai.types.chat import ParsedChatCompletion
from functools import wraps
from openai import APITimeoutError, RateLimitError
from pydantic import BaseModel
from typing_extensions import TypeVar
import backoff

ResponseFormatT = TypeVar("ResponseFormatT", bound=BaseModel)

CACHE_MISS_SENTINEL = object()


@wraps(client.chat.completions.parse)
async def cached_chat_completion_parsed_with_retry(
    *,
    model,
    messages,
    response_format: ResponseFormatT,
    **kwargs,
) -> ParsedChatCompletion[ResponseFormatT]:
    # CREATE CACHE KEY
    cache_key = _make_key_for_cached_chat_completion_parsed_with_retry(
        model=model,
        messages=messages,
        response_format=response_format,
        **kwargs
    )

    cached_value = await get_async(cache_key, default=CACHE_MISS_SENTINEL)
    # CACHE MISS
    if cached_value is CACHE_MISS_SENTINEL:
        @backoff.on_exception(
            backoff.expo,
            (APITimeoutError, RateLimitError)
        )
        async def do_call():
            return await client.chat.completions.parse(
                model=model,
                messages=messages,
                response_format=response_format,
                **kwargs
            )
        completion = await do_call()
        await set_async(cache_key, completion.model_dump_json())
        return completion
    # CACHE HIT
    else:
        # TODO: Tracing Code (next section)
        # return 
        completion = ParsedChatCompletion.model_validate(json.loads(cached_value))
        for choice in completion.choices:
            if not choice.message.refusal:
                choice.message.parsed = response_format.model_validate(
                    choice.message.parsed
                )
        return completion
        
        

# DATA_GENERATION

## Load the Dataset

In [13]:
import pandas as pd

In [14]:
emails = pd.read_csv("paul_allen_sent_emails.csv")

In [15]:
emails

Unnamed: 0,content,Message-ID,Date,Correspondants
0,"jeff, i want to bid $2.8 for sagewood with a r...",<13537630.1075855669909.JavaMail.evans@thyme,"Mon, 31 Dec 1979 16:00:00 -0800 (PST",Jeff Smith
1,attached is the systems wish list for the gas ...,<27903020.1075855669931.JavaMail.evans@thyme,"Mon, 31 Dec 1979 16:00:00 -0800 (PST","John J Lavorato, Beth Perlman, Hunter S Shivel..."
2,how is your racing going? what category are yo...,<12929996.1075855668941.JavaMail.evans@thyme,"Mon, 31 Dec 1979 16:00:00 -0800 (PST",muller@thedoghousemail.co
3,eol report for tv in conference on 33 cash -he...,<29770699.1075855669609.JavaMail.evans@thyme,"Mon, 31 Dec 1979 16:00:00 -0800 (PST","Stephen Harrington, Mar"
4,"mary, it is ok to buy a carpet shampooer. abou...",<17449361.1075855672476.JavaMail.evans@thyme,"Mon, 31 Dec 1979 16:00:00 -0800 (PST",mary richards
...,...,...,...,...
391,can you guys coordinate to make sure someone l...,<2313514.1075855693911.JavaMail.evans@thyme,"Mon, 7 May 2001 02:28:00 -0700 (PDT","Matthew Lenhart, Jay Reitmeyer, Matt Smit"
392,the west desk would like 2 analysts.,<10598636.1075855693867.JavaMail.evans@thyme,"Wed, 9 May 2001 05:13:00 -0700 (PDT",John J Lavorat
393,"jeff, jacques craig will draw up a release. wh...",<5195408.1075855693846.JavaMail.evans@thyme,"Thu, 10 May 2001 00:50:00 -0700 (PDT",jsmith@austintx.co
394,let me know when you get the quotes from pauli...,<7510478.1075855693794.JavaMail.evans@thyme,"Thu, 10 May 2001 23:26:00 -0700 (PDT",jsmith@austintx.co


# Prompt Engineering

In [16]:
from jinja2 import Template

In [17]:
from textwrap import dedent

In [18]:
from pydantic import BaseModel

class GenerateQuestions(BaseModel):
    useless_to_recall: bool
    questions: list[str]

In [19]:
prompt_template = Template(dedent("""\
    You are Paul Allen, also known as Phillip Allen, and you sent this email:
    
    <email>
    {{ email_content }}
    </email>
    
   Task:
   Generate questions to recall the details of the email. Only generate questions if the email content is useful to recall.

   Guidelines:
   - "Useful to recall" means the email contains specific details, facts, or information that are important for later retrieval. Exeamples include figures, dates, instructions, terms or critical decisions.
   - An email is "useless to recall" if:
     - It references external content (e.g., attachments) without providing details within the email itself.
     - It is vague or incomplete, lacking sufficient information to form meaningful questions.
     - The content is trivial, casual, or irrelevant, such as personal inquiries, greetings, or non-substantive commentary.
     - The email discusses routine or obvious information that doesn't need to be remembered for future reference.
   - If the email is "useless to recall", set "useless_to_recall": true and do not generate any questions.

   Output in JSON:
   ```json
   {
       "useless_to_recall": boolean,
       "questions": ["string 1", "string 2", ...]
   }
   ```
"""))

# Iterate on the prompt

In [20]:
email = emails.iloc[0]

In [21]:
email.content

'jeff, i want to bid $2.8 for sagewood with a rate 8.5% or less and dependent on 30 year term'

In [22]:
prompt = prompt_template.render(
    email_content=email.content
)

In [23]:
prompt

' You are Paul Allen, also known as Phillip Allen, and you sent this email:\n\n <email>\n jeff, i want to bid $2.8 for sagewood with a rate 8.5% or less and dependent on 30 year term\n </email>\n\nTask:\nGenerate questions to recall the details of the email. Only generate questions if the email content is useful to recall.\n\nGuidelines:\n- "Useful to recall" means the email contains specific details, facts, or information that are important for later retrieval. Exeamples include figures, dates, instructions, terms or critical decisions.\n- An email is "useless to recall" if:\n  - It references external content (e.g., attachments) without providing details within the email itself.\n  - It is vague or incomplete, lacking sufficient information to form meaningful questions.\n  - The content is trivial, casual, or irrelevant, such as personal inquiries, greetings, or non-substantive commentary.\n  - The email discusses routine or obvious information that doesn\'t need to be remembered for

In [24]:
MODEL = GPT4O_MINI

In [25]:
completion = await cached_chat_completion_parsed_with_retry(
    model=MODEL,
    messages=[user(prompt)],
    response_format=GenerateQuestions
)

In [26]:
completion.choices[0].message.parsed.questions

['What is the bid amount Paul Allen is proposing for Sagewood?',
 'What interest rate is Paul Allen requesting for the bid?',
 'What is the term length specified in the email regarding the bid?']

In [27]:
async def try_the_prompt(i):
    email = emails.iloc[i]
    prompt = prompt_template.render(
        email_content=email.content
    )
    
    completion = await cached_chat_completion_parsed_with_retry(
        model=MODEL,
        messages=[user(prompt)],
        response_format=GenerateQuestions
    )
    
    return completion.choices[0].message.parsed, email.content

In [28]:
r, e = await try_the_prompt(1)

In [29]:
e

'attached is the systems wish list for the gas basis and physical trading'

In [30]:
r.questions

[]

In [31]:
r.useless_to_recall

True

### notes on what we don't like in the inputs

'attached is the systems wish list for the gas basis and physical trading' -> useless because we don't have access to the attachment
'how is your racing going? what category are you up to? i' -> generated ['How is your racing going?', 'What category are you up to?']: bad questions (it is useless to recall this)

# Batching calls

## Create the task function

In [32]:
import traceback

async def try_the_prompt(i):
    email = emails.iloc[i]
    prompt = prompt_template.render(
        email_content=email.content
    )
    
    completion = await cached_chat_completion_parsed_with_retry(
        model=MODEL,
        messages=[user(prompt)],
        response_format=GenerateQuestions
    )
    
    return completion.choices[0].message.parsed, email.content

async def maybe_try_the_prompt(i):
    try:
        return await try_the_prompt(i)
    except Exception as e:
        return(i, e, traceback.format_exc())

In [33]:
tasks = [maybe_try_the_prompt(i) for i in range(10)]

results = await asyncio.gather(*tasks)

In [34]:
failed = [r for r in results if isinstance(r[0], int)]
f"{len(failed)=}, {len(results)=}"

'len(failed)=0, len(results)=10'

In [35]:
results

[(GenerateQuestions(useless_to_recall=False, questions=['What is the bid amount Paul Allen is proposing for Sagewood?', 'What interest rate is Paul Allen requesting for the bid?', 'What is the term length specified in the email regarding the bid?']),
  'jeff, i want to bid $2.8 for sagewood with a rate 8.5% or less and dependent on 30 year term'),
 (GenerateQuestions(useless_to_recall=True, questions=[]),
  'attached is the systems wish list for the gas basis and physical trading'),
 (GenerateQuestions(useless_to_recall=True, questions=[]),
  'how is your racing going? what category are you up to? i'),
 (GenerateQuestions(useless_to_recall=False, questions=['What is the significance of the EOL report mentioned in the email?', "What does 'tv in conference' refer to in this context?", "What does '33 cash' indicate regarding the report?", 'Can you elaborate on the locations mentioned: hehub, Chicago, pepl, Katy, and waha?', "What time frame does 'prompt month' refer to in relation to 'nym

In [36]:
from pprint import pprint

In [37]:
pprint([(r[1], r[0].model_dump()) for r in results])

[('jeff, i want to bid $2.8 for sagewood with a rate 8.5% or less and '
  'dependent on 30 year term',
  {'questions': ['What is the bid amount Paul Allen is proposing for Sagewood?',
                 'What interest rate is Paul Allen requesting for the bid?',
                 'What is the term length specified in the email regarding the '
                 'bid?'],
   'useless_to_recall': False}),
 ('attached is the systems wish list for the gas basis and physical trading',
  {'questions': [], 'useless_to_recall': True}),
 ('how is your racing going? what category are you up to? i',
  {'questions': [], 'useless_to_recall': True}),
 ('eol report for tv in conference on 33 cash -hehub -chicago -pepl -katy '
  '-waha prompt month nymex',
  {'questions': ['What is the significance of the EOL report mentioned in the '
                 'email?',
                 "What does 'tv in conference' refer to in this context?",
                 "What does '33 cash' indicate regarding the report?",
  

# Run against the full dataset

In [38]:
tasks = [maybe_try_the_prompt(i) for i in range(20)]

results = await asyncio.gather(*tasks)

In [39]:
from tqdm.notebook import tqdm
from tqdm.asyncio import tqdm_asyncio

In [40]:
tasks = [maybe_try_the_prompt(i) for i in range(len(emails))]

results = await tqdm_asyncio.gather(*tasks)

100%|██████████| 396/396 [00:00<00:00, 943.48it/s] 


In [41]:
failed = [r for r in results if isinstance(r[0], int)]
f"{len(failed)=}, {len(results)=}"

'len(failed)=0, len(results)=396'

In [42]:
results[0]

(GenerateQuestions(useless_to_recall=False, questions=['What is the bid amount Paul Allen is proposing for Sagewood?', 'What interest rate is Paul Allen requesting for the bid?', 'What is the term length specified in the email regarding the bid?']),
 'jeff, i want to bid $2.8 for sagewood with a rate 8.5% or less and dependent on 30 year term')

## join with email and save

In [43]:
col_useless_to_recall = []
col_questions = []

for (generated_questions, _) in results:
    col_useless_to_recall.append(generated_questions.useless_to_recall)
    col_questions.append(generated_questions.questions)

In [44]:
len(col_questions)

396

In [45]:
len(col_useless_to_recall)

396

In [46]:
len(emails)

396

In [47]:
emails['useless_to_recall'] = col_useless_to_recall

In [48]:
emails['questions'] = col_questions

In [49]:
emails.to_csv('paul_allen_sent_email_with_questions_v1.csv', index=False)