 ## Example in how to use ChatModel Wrapper

In [1]:
import pandas as pd

from automated_llm_eval.chat_model import ChatModel, Message
from automated_llm_eval.utils import ProgressBar, sidethread_event_loop_async_runner


# Instantiate wrapper around OpenAI's API
model = ChatModel(model="gpt-3.5-turbo-1106")
# model = ChatModel(model="gpt-4-1106-preview")
model

ModuleNotFoundError: No module named 'automated_llm_eval'

In [2]:
# You can adjust other model settings globally for all API calls
model2 = ChatModel(model="gpt-3.5-turbo-1106", temperature=0.5, top_p=0.5, max_tokens=300, seed=42)
model2

ChatModel(sync_client=<openai.OpenAI object at 0x11ca88c90>, async_client=<openai.AsyncOpenAI object at 0x11caac150>, model='gpt-3.5-turbo-1106', temperature=0.5, top_p=0.5, max_tokens=300, n=1, seed=42)

In [3]:
# `max_tokens = None` means no max_token limit (this is the default)
model2 = ChatModel(model="gpt-3.5-turbo-1106", temperature=0.5, top_p=0.5, max_tokens=None, seed=42)
model2

ChatModel(sync_client=<openai.OpenAI object at 0x11ca88c90>, async_client=<openai.AsyncOpenAI object at 0x11caac150>, model='gpt-3.5-turbo-1106', temperature=0.5, top_p=0.5, max_tokens=None, n=1, seed=42)

 ### Making API calls using synchronous (blocking) client

In [4]:
# Make API call, get response message.
# Note: `output_format = "simple"`
response_message = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="simple",
)
print(response_message)

Sure! Did you hear about the apple who went to the doctor? It said, "I'm feeling a little rotten."


In [5]:
# Make API call, get original ChatCompletion object.
# Note: `output_format = None`
response = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format=None,
)
print(response)

ChatCompletion(id='chatcmpl-8LIEizorIQmTB8O3C8YwAlafmnGGl', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content="Why did the apple go to the doctor? Because it wasn't peeling well!", role='assistant', function_call=None, tool_calls=None))], created=1700085680, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage=CompletionUsage(completion_tokens=17, prompt_tokens=24, total_tokens=41))


In [6]:
# Make API call, get response packaged with input + metadata.
# Note: `output_format = "bundle"`
bundle = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle",
)
print(bundle)

Bundle(id='chatcmpl-8LIEj0bm89tEdBtdqm2gHzS5Ng8kg', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', metadata=None, response_message='Why did the apple stop in the middle of the road? Because it ran out of juice!', created_time=1700085681, model='gpt-3.5-turbo-1106', total_tokens=43, prompt_tokens=24, completion_tokens=19, seed=None, temperature=0.9, top_p=0.9, max_tokens=None)


In [7]:
# Make API call, get MessageBundle as a dict.
# Note: `output_format = "bundle_dict"`
bundle_dict = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle_dict",
)
print(bundle_dict)

{'id': 'chatcmpl-8LIElldN2tGoBkLDl2dBdVrWWPWfJ', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'metadata': None, 'response_message': 'Why did the apple go to therapy? Because it had too many core issues!', 'created_time': 1700085683, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 40, 'prompt_tokens': 24, 'completion_tokens': 16, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


In [8]:
# Message bundle dict can be converted into pandas Series easily
s = pd.Series(bundle_dict)
s

id                              chatcmpl-8LIElldN2tGoBkLDl2dBdVrWWPWfJ
system_message                         You are a joke telling machine.
user_message                           Tell me something about apples.
metadata                                                          None
response_message     Why did the apple go to therapy? Because it ha...
created_time                                                1700085683
model                                               gpt-3.5-turbo-1106
total_tokens                                                        40
prompt_tokens                                                       24
completion_tokens                                                   16
seed                                                              None
temperature                                                        0.9
top_p                                                              0.9
max_tokens                                                        None
dtype:

In [9]:
# Multiple message bundle dicts can be converted into pandas DataFrame
# NOTE: if an API call fails, then `None` will be returned. `None` items cannot
# be directly converted into pd.DataFrame
responses = []
with ProgressBar() as p:
    for _ in p.track(range(5)):
        response = model.create_chat_completion(
            system_message="You are a joke telling machine.",
            user_message="Tell me something about apples.",
            output_format="bundle_dict",
            temperature=0.4,
            seed=None,
        )
        responses += [response]

df = pd.DataFrame(responses)
df

Output()

Unnamed: 0,id,system_message,user_message,metadata,response_message,created_time,model,total_tokens,prompt_tokens,completion_tokens,seed,temperature,top_p,max_tokens
0,chatcmpl-8LIEvZuvS3EzxVgGWtDQAeSt0v7gy,You are a joke telling machine.,Tell me something about apples.,,Why did the apple go to the doctor? Because it...,1700085693,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
1,chatcmpl-8LIEwQDOYw4kAjsTy6sybftU3lcJt,You are a joke telling machine.,Tell me something about apples.,,Why did the apple go to the doctor? Because it...,1700085694,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
2,chatcmpl-8LIEwO36JC3epmP7r3nUKzTuJeU2d,You are a joke telling machine.,Tell me something about apples.,,Sure! Did you hear about the apple who went to...,1700085694,gpt-3.5-turbo-1106,60,24,36,,0.4,0.9,
3,chatcmpl-8LIExaDW5XR6XQUcQfc6HE0lFGYmb,You are a joke telling machine.,Tell me something about apples.,,Why did the apple go to the doctor? Because it...,1700085695,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
4,chatcmpl-8LIEyDt4t3SfCUdkEDnBYVJLFsamz,You are a joke telling machine.,Tell me something about apples.,,Why did the apple go to the doctor? Because it...,1700085696,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,


In [10]:
# If an API call fails, this method will automatically retry and make another API call.
# By default it will retry 5 times.  We can change this value to 2.
bundle_dict = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle_dict",
    num_retries=2,
)
print(bundle_dict)

{'id': 'chatcmpl-8LIEzFIdVzBSsJJ21hng3AZI8PRLc', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'metadata': None, 'response_message': "Sure, here's a joke about apples:\n\nWhy did the apple stop in the middle of the road?\n\nBecause it ran out of juice!", 'created_time': 1700085697, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 52, 'prompt_tokens': 24, 'completion_tokens': 28, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


In [11]:
# The `create_chat_completion` method is syntactic sugar for `chat_completion`.
# It simply formats the message for us.
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]

bundle_dict = model.chat_completion(
    messages=messages,
    output_format="bundle_dict",
    num_retries=2,
)
print(bundle_dict)

{'id': 'chatcmpl-8LIF1fVbUbsko55IgU36Eb1EstOiz', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'metadata': None, 'response_message': "Why did the apple go to the doctor? Because it wasn't peeling well!", 'created_time': 1700085699, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 41, 'prompt_tokens': 24, 'completion_tokens': 17, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


 ### Making API calls using asynchronous (non-blocking) client

 This enables concurrent API calls.  We can control the max concurrency.

 Async uses the asyncio paradigm.  We need to run an asyncio event loop to
 use these functions.
 NOTE: a jupyter notebook has an asyncio event loop running by default,
 but you need to create your own asyncio event loop in a python script

In [2]:
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]

response = await model.async_chat_completion(messages=messages, num_retries=1)  # noqa: F704:
response


ChatCompletion(id='chatcmpl-8LIL18DbBB8s5U3msQG6anMD6DQPJ', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='Why did the apple stop in the middle of the road?\n\nBecause it ran out of juice!', role='assistant', function_call=None, tool_calls=None))], created=1700086071, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage=CompletionUsage(completion_tokens=19, prompt_tokens=24, total_tokens=43))

In [3]:
# Duplicate Messages x 5 times so that we can make 5 API calls
messages_list = [messages] * 5
messages_list

[[{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}]]

In [4]:
# Use Async Chat Completions, limit to 2 concurrent API calls at any given time
responses_list = await model.async_chat_completions(  # noqa: F704
    messages_list=messages_list,
    num_concurrent=2,
    num_retries=1,
    output_format="bundle_dict",
)

df = pd.DataFrame(responses_list)
df

Output()

Unnamed: 0,id,system_message,user_message,metadata,response_message,created_time,model,total_tokens,prompt_tokens,completion_tokens,seed,temperature,top_p,max_tokens
0,chatcmpl-8LIL3Z0WuNEHO6x6A3vjEU5qHYEya,You are a joke telling machine.,Tell me something about apples.,,Sure! Did you hear about the apple that joined...,1700086073,gpt-3.5-turbo-1106,47,24,23,,0.9,0.9,
1,chatcmpl-8LIL3kuZ296LHjrzvV2k250r1VjkX,You are a joke telling machine.,Tell me something about apples.,,Sure! Did you hear about the apple that went o...,1700086073,gpt-3.5-turbo-1106,48,24,24,,0.9,0.9,
2,chatcmpl-8LIL4ERAgZT72ybOqtqsyqSr1DC6s,You are a joke telling machine.,Tell me something about apples.,,Sure! Did you hear about the apple who went to...,1700086074,gpt-3.5-turbo-1106,68,24,44,,0.9,0.9,
3,chatcmpl-8LIL4q1BuM549bQSQ1hxc3p0kPXVE,You are a joke telling machine.,Tell me something about apples.,,Why did the apple go to the doctor? Because it...,1700086074,gpt-3.5-turbo-1106,41,24,17,,0.9,0.9,
4,chatcmpl-8LIL48ILhKaXSpRBpM94aav2uC551,You are a joke telling machine.,Tell me something about apples.,,Sure! Did you hear about the apple that went o...,1700086074,gpt-3.5-turbo-1106,53,24,29,,0.9,0.9,


 ### Example of using `Message` and `validation_callback`

 The `Message` wrapper allows packaging arbitrary user-defined metadata along with each message
 which is a good place to put labels, notes, etc.

 The `validation_callback` argument enables the user to define
 specific logic to validate the response from each API call to OpenAI
 for each message.  Passed into the callback function is the original
 `messages` and the `response`.  If the `messages` is a `Message` object,
 this will be returned in `validation_callback` for access to all metadata.
 `response` is the LLM response after being parsed and formated as specified
 in `output_format`.

In [5]:
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]
m = Message(messages=messages, metadata={"a": 1})


def validation_callback_fn(messages, response) -> bool:
    print(f"In Callback. Messages: {messages}")
    print(f"In Callback. Response: {response}")
    print("\n")
    metadata = messages.metadata
    if "a" in metadata:
        return metadata["a"] == 1
    else:
        return False


# Instantiate wrapper around OpenAI's API
model = ChatModel(model="gpt-3.5-turbo-1106")
# Make ChatCompletion with...
# - using Message wrapper and include metadata (ChatModel automatically unpacks Message.messages)
# - parse raw OpenAI response into "simple" string format
# - then call the `validation_callback_fn` that we defined.  ChatModel always passes in
#   original messages input and parsed response as the 1st and 2nd arguments.  The
#   `validation_callback_fn` can contain any logic, but ultimately needs to return `True` vs `False`
#   to accept or reject the response.  If the response is rejected, ChatModel automatically retries.
# - allow up to 1 retry.  If still fails/rejected after 1 retry, then will return `None`.
response = model.chat_completion(
    m,
    output_format="bundle_dict",
    validation_callback=validation_callback_fn,
    num_retries=1,
)
response


In Callback. Messages: Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1})
In Callback. Response: {'id': 'chatcmpl-8LILBkfH7yk4FZO1Q8EStTOU2xQMI', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'metadata': {'a': 1}, 'response_message': 'Why did the apple go to school?\n\nBecause it wanted to be a "smart" apple!', 'created_time': 1700086081, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 43, 'prompt_tokens': 24, 'completion_tokens': 19, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}




{'id': 'chatcmpl-8LILBkfH7yk4FZO1Q8EStTOU2xQMI',
 'system_message': 'You are a joke telling machine.',
 'user_message': 'Tell me something about apples.',
 'metadata': {'a': 1},
 'response_message': 'Why did the apple go to school?\n\nBecause it wanted to be a "smart" apple!',
 'created_time': 1700086081,
 'model': 'gpt-3.5-turbo-1106',
 'total_tokens': 43,
 'prompt_tokens': 24,
 'completion_tokens': 19,
 'seed': None,
 'temperature': 0.9,
 'top_p': 0.9,
 'max_tokens': None}

In [6]:
# Multiple concurrent async chat completions using Message
# NOTE: we make the 3rd Message with different metadata.  This should cause
# the `validation_callback_fn` to reject the response for only the 3rd Message in list
# and retry only the 3rd Message.
msg_list = [m] * 2 + [Message(messages=messages, metadata={"b": 2})]
msg_list

[Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1}),
 Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1}),
 Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'b': 2})]

In [7]:
# Use Async Chat Completions, limit to 2 concurrent API calls at any given time & 1 retry
responses_list = await model.async_chat_completions(  # noqa: F704
    messages_list=msg_list,
    num_concurrent=2,
    num_retries=1,
    validation_callback=validation_callback_fn,
    output_format="bundle_dict",
)

Output()

In [8]:
# Examine responses.
# - We should get valid responses for the first 2 responses.
# - The 3rd response should always be `None` because the metadata cannot pass at
#   `validation_callback_fn`
responses_list


[{'id': 'chatcmpl-8LILIcbSRuWmhzdoe9PSICAtjv80c',
  'system_message': 'You are a joke telling machine.',
  'user_message': 'Tell me something about apples.',
  'metadata': {'a': 1},
  'response_message': "Why did the apple break up with the orange? Because it couldn't handle the pithy comments!",
  'created_time': 1700086088,
  'model': 'gpt-3.5-turbo-1106',
  'total_tokens': 45,
  'prompt_tokens': 24,
  'completion_tokens': 21,
  'seed': None,
  'temperature': 0.9,
  'top_p': 0.9,
  'max_tokens': None},
 {'id': 'chatcmpl-8LILIcDJyzGrKomK2TVnDzWwooqkX',
  'system_message': 'You are a joke telling machine.',
  'user_message': 'Tell me something about apples.',
  'metadata': {'a': 1},
  'response_message': 'Why did the apple go to therapy?\n\nBecause it had too many core issues!',
  'created_time': 1700086088,
  'model': 'gpt-3.5-turbo-1106',
  'total_tokens': 40,
  'prompt_tokens': 24,
  'completion_tokens': 16,
  'seed': None,
  'temperature': 0.9,
  'top_p': 0.9,
  'max_tokens': None}

 ### Calling Async function from Sync code

In [9]:
model = ChatModel(model="gpt-3.5-turbo-1106")

system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]
m = Message(messages=messages, metadata={"a": 1})
msg_list = [m] * 3


In [10]:
# Up until now, we have used `await` to call async functions and wait for their completion.
# However, `await` this can only be used within async functions.
# we are not allowed to call `await` from a function not defined with `async def`
responses = await model.async_chat_completions(
    messages_list=msg_list, num_concurrent=2, output_format="bundle"
)
responses


Output()

[Bundle(id='chatcmpl-8LILXRTjxlnIu0lk6yvEvZZHNGynL', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', metadata={'a': 1}, response_message='Why did the apple go to school?\n\nBecause it wanted to be a "smart" apple!', created_time=1700086103, model='gpt-3.5-turbo-1106', total_tokens=43, prompt_tokens=24, completion_tokens=19, seed=None, temperature=0.9, top_p=0.9, max_tokens=None),
 Bundle(id='chatcmpl-8LILSe9zlppqjSFLLppyJKvx9PK4q', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', metadata={'a': 1}, response_message="Sure! Did you hear about the apple who went to a party? He was the apple of everyone's eye!", created_time=1700086098, model='gpt-3.5-turbo-1106', total_tokens=47, prompt_tokens=24, completion_tokens=23, seed=None, temperature=0.9, top_p=0.9, max_tokens=None),
 Bundle(id='chatcmpl-8LILTqQPgygaqYYw4qKBFzwIQNERc', system_message='You are a joke telling machine.', user_message='T

In [11]:
# We have created a helper function to address this issue.
#
# Call async method from sync function without using `await` keyword.
# This involves creating an event loop on another thread, then
# waiting for result on main thread and shutting down the event loop on other thread.

result = sidethread_event_loop_async_runner(
    async_function=model.async_chat_completions(
        messages_list=msg_list, num_concurrent=2, output_format="bundle"
    )
)
result


Output()

[Bundle(id='chatcmpl-8LILaCN5zBKFT2f1NVO8KVfdWOHKF', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', metadata={'a': 1}, response_message='Sure! Did you hear about the apple who went to school? He wanted to be a "smart" apple!', created_time=1700086106, model='gpt-3.5-turbo-1106', total_tokens=47, prompt_tokens=24, completion_tokens=23, seed=None, temperature=0.9, top_p=0.9, max_tokens=None),
 Bundle(id='chatcmpl-8LILZ5oC2cegD0t6n7tAGYDB8dgwI', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', metadata={'a': 1}, response_message="Why did the apple go to the doctor? Because it wasn't peeling well!", created_time=1700086105, model='gpt-3.5-turbo-1106', total_tokens=41, prompt_tokens=24, completion_tokens=17, seed=None, temperature=0.9, top_p=0.9, max_tokens=None),
 Bundle(id='chatcmpl-8LILZXMugSlaP2R2JGWiWF0tKmK4J', system_message='You are a joke telling machine.', user_message='Tell me somet