 ## Example in how to use ChatModel Wrapper

In [1]:
import pandas as pd

from automated_llm_eval.chat_model import ChatModel, Message
from automated_llm_eval.utils import ProgressBar, sidethread_event_loop_async_runner

# Instantiate wrapper around OpenAI's API
model = ChatModel(model="gpt-3.5-turbo-1106")
# model = ChatModel(model="gpt-4-1106-preview")
model

ChatModel(sync_client=<openai.OpenAI object at 0x11ca88d90>, async_client=<openai.AsyncOpenAI object at 0x11caac1d0>, model='gpt-3.5-turbo-1106', temperature=0.9, top_p=0.9, max_tokens=None, n=1, seed=None)

In [2]:
# You can adjust other model settings globally for all API calls
model2 = ChatModel(model="gpt-3.5-turbo-1106", temperature=0.5, top_p=0.5, max_tokens=300, seed=42)
model2

ChatModel(sync_client=<openai.OpenAI object at 0x11f488ed0>, async_client=<openai.AsyncOpenAI object at 0x11f4ac390>, model='gpt-3.5-turbo-1106', temperature=0.5, top_p=0.5, max_tokens=300, n=1, seed=42)

In [3]:
# `max_tokens = None` means no max_token limit (this is the default)
model2 = ChatModel(model="gpt-3.5-turbo-1106", temperature=0.5, top_p=0.5, max_tokens=None, seed=42)
model2

ChatModel(sync_client=<openai.OpenAI object at 0x11f488ed0>, async_client=<openai.AsyncOpenAI object at 0x11f4ac390>, model='gpt-3.5-turbo-1106', temperature=0.5, top_p=0.5, max_tokens=None, n=1, seed=42)

 ### Making API calls using synchronous (blocking) client

In [4]:
# Make API call, get response message.
# Note: `output_format = "simple"`
response_message = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="simple",
)
print(response_message)

Sure! Did you hear about the apple who went to the doctor? It wasn't feeling well, so the doctor said, "You're just a little too core-ny!"


In [5]:
# Make API call, get original ChatCompletion object.
# Note: `output_format = None`
response = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format=None,
)
print(response)

ChatCompletion(id='chatcmpl-8LHFVTIKmcNyrmyoBng465aJUzPmJ', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='Sure! Did you hear about the apple that went to a comedy club? It was a real "core" comedian!', role='assistant', function_call=None, tool_calls=None))], created=1700081885, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage=CompletionUsage(completion_tokens=24, prompt_tokens=24, total_tokens=48))


In [6]:
# Make API call, get response packaged with input + metadata.
# Note: `output_format = "bundle"`
bundle = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle",
)
print(bundle)

Bundle(id='chatcmpl-8LHFWxY85RSdhc9Jur0LEwdcuhVIP', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', response_message='Sure! Did you hear about the apple that joined a band? It was a real "fruit" musician!', created_time=1700081886, model='gpt-3.5-turbo-1106', total_tokens=46, prompt_tokens=24, completion_tokens=22, seed=None, temperature=0.9, top_p=0.9, max_tokens=None)


In [7]:
# Make API call, get MessageBundle as a dict.
# Note: `output_format = "bundle_dict"`
bundle_dict = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle_dict",
)
print(bundle_dict)

{'id': 'chatcmpl-8LHFX3WIvSFJwf6tdnjoQ9H4Ix6XO', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': "Why did the apple go to the doctor? Because it wasn't peeling well!", 'created_time': 1700081887, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 41, 'prompt_tokens': 24, 'completion_tokens': 17, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


In [8]:
# Message bundle dict can be converted into pandas Series easily
s = pd.Series(bundle_dict)
s

id                              chatcmpl-8LHFX3WIvSFJwf6tdnjoQ9H4Ix6XO
system_message                         You are a joke telling machine.
user_message                           Tell me something about apples.
response_message     Why did the apple go to the doctor? Because it...
created_time                                                1700081887
model                                               gpt-3.5-turbo-1106
total_tokens                                                        41
prompt_tokens                                                       24
completion_tokens                                                   17
seed                                                              None
temperature                                                        0.9
top_p                                                              0.9
max_tokens                                                        None
dtype: object

In [4]:
# Multiple message bundle dicts can be converted into pandas DataFrame
# NOTE: if an API call fails, then `None` will be returned. `None` items cannot
# be directly converted into pd.DataFrame
responses = []
with ProgressBar() as p:
    for _ in p.track(range(5)):
        response = model.create_chat_completion(
            system_message="You are a joke telling machine.",
            user_message="Tell me something about apples.",
            output_format="bundle_dict",
            temperature=0.4,
            seed=None,
        )
        responses += [response]

df = pd.DataFrame(responses)
df

Working... 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5/5 • 0:00:13 • 0:00:00


Unnamed: 0,id,system_message,user_message,response_message,created_time,model,total_tokens,prompt_tokens,completion_tokens,seed,temperature,top_p,max_tokens
0,chatcmpl-8LHTZwe8mtAmyl3T06VXpBGnexLfT,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple who won the...,1700082757,gpt-3.5-turbo-1106,48,24,24,,0.4,0.9,
1,chatcmpl-8LHTbLMzUcU92YIXRXcxyV1Qrg9n6,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700082759,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
2,chatcmpl-8LHTd1NGz4ddsbrOLAeivECQ6BfDV,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700082761,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
3,chatcmpl-8LHTln8gLfOQoWeE2BVcbpsq7TMJm,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple who went to...,1700082769,gpt-3.5-turbo-1106,59,24,35,,0.4,0.9,
4,chatcmpl-8LHTndPlf4u989oUvyPp6y9CPPaa9,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple that went t...,1700082771,gpt-3.5-turbo-1106,45,24,21,,0.4,0.9,


In [10]:
# If an API call fails, this method will automatically retry and make another API call.
# By default it will retry 5 times.  We can change this value to 2.
bundle_dict = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle_dict",
    num_retries=2,
)
print(bundle_dict)

{'id': 'chatcmpl-8LHFe3TwPLPVF5apK6rbRGC34iLR9', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': "Why did the apple go to the doctor? Because it wasn't peeling well!", 'created_time': 1700081894, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 41, 'prompt_tokens': 24, 'completion_tokens': 17, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


In [11]:
# The `create_chat_completion` method is syntactic sugar for `chat_completion`.
# It simply formats the message for us.
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]

bundle_dict = model.chat_completion(
    messages=messages,
    output_format="bundle_dict",
    num_retries=2,
)
print(bundle_dict)

{'id': 'chatcmpl-8LHFfL1BpS9A4Z2dk6EIpQHnIXVP4', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': "Why did the apple go to the doctor? Because it wasn't peeling well!", 'created_time': 1700081895, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 41, 'prompt_tokens': 24, 'completion_tokens': 17, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


 ### Making API calls using asynchronous (non-blocking) client

 This enables concurrent API calls.  We can control the max concurrency.

 Async uses the asyncio paradigm.  We need to run an asyncio event loop to
 use these functions.
 NOTE: a jupyter notebook has an asyncio event loop running by default,
 but you need to create your own asyncio event loop in a python script

In [12]:
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]

response = await model.async_chat_completion(messages=messages, num_retries=1)  # noqa: F704:
response


ChatCompletion(id='chatcmpl-8LHFiXgCVD8lu3ncjwKD0ULlgi5Rx', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='Sure! Did you hear about the apple who went to the doctor? The doctor said, "You\'re not looking so hot, you should probably see a cider-ist!"', role='assistant', function_call=None, tool_calls=None))], created=1700081898, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage=CompletionUsage(completion_tokens=35, prompt_tokens=24, total_tokens=59))

In [13]:
# Duplicate Messages x 5 times so that we can make 5 API calls
messages_list = [messages] * 5
messages_list

[[{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}]]

In [14]:
# Use Async Chat Completions, limit to 2 concurrent API calls at any given time
responses_list = await model.async_chat_completions(  # noqa: F704
    messages_list=messages_list,
    num_concurrent=2,
    num_retries=1,
    output_format="bundle_dict",
)

df = pd.DataFrame(responses_list)
df

Output()

Unnamed: 0,id,system_message,user_message,response_message,created_time,model,total_tokens,prompt_tokens,completion_tokens,seed,temperature,top_p,max_tokens
0,chatcmpl-8LHFkpQFlOy3Mm1kRU5ztKUBrGSvn,You are a joke telling machine.,Tell me something about apples.,Why did the apple stop in the middle of the ro...,1700081900,gpt-3.5-turbo-1106,43,24,19,,0.9,0.9,
1,chatcmpl-8LHFkcxmbRkorvjQ0rnqMdaA7jekx,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700081900,gpt-3.5-turbo-1106,41,24,17,,0.9,0.9,
2,chatcmpl-8LHFl5eCUj6urcXpbH1QdD3p2zZ7U,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple that went o...,1700081901,gpt-3.5-turbo-1106,52,24,28,,0.9,0.9,
3,chatcmpl-8LHFm4YFHXxzZS8kGPvYPQDqahpXM,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple that won th...,1700081902,gpt-3.5-turbo-1106,51,24,27,,0.9,0.9,
4,chatcmpl-8LHFnBoOMlN4WsVJfctUsrjUJMRYC,You are a joke telling machine.,Tell me something about apples.,Why did the apple break up with the banana?\n\...,1700081903,gpt-3.5-turbo-1106,43,24,19,,0.9,0.9,


 ### Example of using `Message` and `validation_callback`

 The `Message` wrapper allows packaging arbitrary user-defined metadata along with each message
 which is a good place to put labels, notes, etc.

 The `validation_callback` argument enables the user to define
 specific logic to validate the response from each API call to OpenAI
 for each message.  Passed into the callback function is the original
 `messages` and the `response`.  If the `messages` is a `Message` object,
 this will be returned in `validation_callback` for access to all metadata.
 `response` is the LLM response after being parsed and formated as specified
 in `output_format`.

In [15]:
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]
m = Message(messages=messages, metadata={"a": 1})


def validation_callback_fn(messages, response) -> bool:
    print(f"In Callback. Messages: {messages}")
    print(f"In Callback. Response: {response}")
    print("\n")
    metadata = messages.metadata
    if "a" in metadata:
        return metadata["a"] == 1
    else:
        return False


# Instantiate wrapper around OpenAI's API
model = ChatModel(model="gpt-3.5-turbo-1106")
# Make ChatCompletion with...
# - using Message wrapper and include metadata (ChatModel automatically unpacks Message.messages)
# - parse raw OpenAI response into "simple" string format
# - then call the `validation_callback_fn` that we defined.  ChatModel always passes in
#   original messages input and parsed response as the 1st and 2nd arguments.  The
#   `validation_callback_fn` can contain any logic, but ultimately needs to return `True` vs `False`
#   to accept or reject the response.  If the response is rejected, ChatModel automatically retries.
# - allow up to 1 retry.  If still fails/rejected after 1 retry, then will return `None`.
response = model.chat_completion(
    m,
    output_format="simple",
    validation_callback=validation_callback_fn,
    num_retries=1,
)
response


In Callback. Messages: Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1})
In Callback. Response: Why did the apple stop in the middle of the road?

Because it ran out of juice!




'Why did the apple stop in the middle of the road?\n\nBecause it ran out of juice!'

In [16]:
# Multiple concurrent async chat completions using Message
# NOTE: we make the 3rd Message with different metadata.  This should cause
# the `validation_callback_fn` to reject the response for only the 3rd Message in list
# and retry only the 3rd Message.
msg_list = [m] * 2 + [Message(messages=messages, metadata={"b": 2})]
msg_list

[Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1}),
 Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1}),
 Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'b': 2})]

In [17]:
# Use Async Chat Completions, limit to 2 concurrent API calls at any given time & 1 retry
responses_list = await model.async_chat_completions(  # noqa: F704
    messages_list=msg_list,
    num_concurrent=2,
    num_retries=1,
    validation_callback=validation_callback_fn,
    output_format="simple",
)

Output()

In [18]:
# Examine responses.
# - We should get valid responses for the first 2 responses.
# - The 3rd response should always be `None` because the metadata cannot pass at
#   `validation_callback_fn`
responses_list


["Why did the apple go to the doctor? Because it wasn't peeling well!",
 'Why did the apple stop in the middle of the road? Because it ran out of juice!',
 None]

 ### Calling Async function from Sync code

In [19]:
model = ChatModel(model="gpt-3.5-turbo-1106")

system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]
m = Message(messages=messages, metadata={"a": 1})
msg_list = [m] * 3


In [20]:
# Up until now, we have used `await` to call async functions and wait for their completion.
# However, `await` this can only be used within async functions.
# we are not allowed to call `await` from a function not defined with `async def`
responses = await model.async_chat_completions(
    messages_list=msg_list, num_concurrent=2, output_format="bundle"
)
responses


Output()

[Bundle(id='chatcmpl-8LHG0xLSG28e74jLvsIE7t7R9b1mm', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', response_message='Why did the apple stop in the middle of the road? Because it ran out of juice!', created_time=1700081916, model='gpt-3.5-turbo-1106', total_tokens=43, prompt_tokens=24, completion_tokens=19, seed=None, temperature=0.9, top_p=0.9, max_tokens=None),
 Bundle(id='chatcmpl-8LHG02RSYvxNGW0IfApWHnwaJv29n', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', response_message='Why did the apple go to therapy? Because it had a core issue!', created_time=1700081916, model='gpt-3.5-turbo-1106', total_tokens=39, prompt_tokens=24, completion_tokens=15, seed=None, temperature=0.9, top_p=0.9, max_tokens=None),
 Bundle(id='chatcmpl-8LHG1HlqTAMrReI3wB66y2CvKZLRY', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', response_message='Why did the ap

In [21]:
# We have created a helper function to address this issue.
#
# Call async method from sync function without using `await` keyword.
# This involves creating an event loop on another thread, then
# waiting for result on main thread and shutting down the event loop on other thread.

result = sidethread_event_loop_async_runner(
    async_function=model.async_chat_completions(
        messages_list=msg_list, num_concurrent=2, output_format="bundle"
    )
)
result


Output()

[Bundle(id='chatcmpl-8LHG5m7piUbUsBWqFS2nwuaQuOhVu', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', response_message="Why did the apple break up with the orange? Because it couldn't find a core connection!", created_time=1700081921, model='gpt-3.5-turbo-1106', total_tokens=43, prompt_tokens=24, completion_tokens=19, seed=None, temperature=0.9, top_p=0.9, max_tokens=None),
 Bundle(id='chatcmpl-8LHG5cvTHsyd6QBL33g6hTKqtv6SP', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', response_message='Sure! Did you hear about the apple that went on a date with a banana? It was a fruit match made in heaven!', created_time=1700081921, model='gpt-3.5-turbo-1106', total_tokens=50, prompt_tokens=24, completion_tokens=26, seed=None, temperature=0.9, top_p=0.9, max_tokens=None),
 Bundle(id='chatcmpl-8LHG6HJ0Zs3AfIDArMRRoyzp1Cq6X', system_message='You are a joke telling machine.', user_message='Tell me some