 ## Example in how to use ChatModel Wrapper

In [1]:
import pandas as pd

from automated_llm_eval.chat_model import ChatModel, Message
from automated_llm_eval.utils import ProgressBar

# Instantiate wrapper around OpenAI's API
model = ChatModel(model="gpt-3.5-turbo-1106")
# model = ChatModel(model="gpt-4-1106-preview")
model

ChatModel(sync_client=<openai.OpenAI object at 0x11ca1ce90>, async_client=<openai.AsyncOpenAI object at 0x11ca40190>, model='gpt-3.5-turbo-1106', temperature=0.9, top_p=0.9, max_tokens=None, n=1, seed=None)

In [2]:
# You can adjust other model settings globally for all API calls
model2 = ChatModel(model="gpt-3.5-turbo-1106", temperature=0.5, top_p=0.5, max_tokens=300, seed=42)
model2

ChatModel(sync_client=<openai.OpenAI object at 0x11f489150>, async_client=<openai.AsyncOpenAI object at 0x11f4ac490>, model='gpt-3.5-turbo-1106', temperature=0.5, top_p=0.5, max_tokens=300, n=1, seed=42)

In [3]:
# `max_tokens = None` means no max_token limit (this is the default)
model2 = ChatModel(model="gpt-3.5-turbo-1106", temperature=0.5, top_p=0.5, max_tokens=None, seed=42)
model2

ChatModel(sync_client=<openai.OpenAI object at 0x11f489150>, async_client=<openai.AsyncOpenAI object at 0x11f4ac490>, model='gpt-3.5-turbo-1106', temperature=0.5, top_p=0.5, max_tokens=None, n=1, seed=42)

 ### Making API calls using synchronous (blocking) client

In [4]:
# Make API call, get response message.
# Note: `output_format = "simple"`
response_message = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="simple",
)
print(response_message)

Sure! Did you hear about the apple that went to school? It got voted "most popular" in the lunchbox!


In [5]:
# Make API call, get original ChatCompletion object.
# Note: `output_format = None`
response = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format=None,
)
print(response)

ChatCompletion(id='chatcmpl-8L3gRrlWtthxN4656Dhys0OtR1uit', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='Sure! Did you hear about the apple who joined a rock band? He was a real "core" musician!', role='assistant', function_call=None, tool_calls=None))], created=1700029739, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage=CompletionUsage(completion_tokens=23, prompt_tokens=24, total_tokens=47))


In [6]:
# Make API call, get response packaged with input + metadata.
# Note: `output_format = "bundle"`
bundle = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle",
)
print(bundle)

Bundle(id='chatcmpl-8L3gTSri0AarIL6H5H3uAx3u3S4WX', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', response_message="Why did the apple go to the doctor?\n\nBecause it wasn't peeling well!", created_time=1700029741, model='gpt-3.5-turbo-1106', total_tokens=41, prompt_tokens=24, completion_tokens=17, seed=None, temperature=0.9, top_p=0.9, max_tokens=None)


In [7]:
# Make API call, get MessageBundle as a dict.
# Note: `output_format = "bundle_dict"`
bundle_dict = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle_dict",
)
print(bundle_dict)

{'id': 'chatcmpl-8L3gV11hvyxjjo3wJeKAEwLrukE96', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': 'Sure! Did you know that apples are great at making jokes? They always know how to "core" up a good laugh!', 'created_time': 1700029743, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 50, 'prompt_tokens': 24, 'completion_tokens': 26, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


In [8]:
# Message bundle dict can be converted into pandas Series easily
s = pd.Series(bundle_dict)
s

id                              chatcmpl-8L3gV11hvyxjjo3wJeKAEwLrukE96
system_message                         You are a joke telling machine.
user_message                           Tell me something about apples.
response_message     Sure! Did you know that apples are great at ma...
created_time                                                1700029743
model                                               gpt-3.5-turbo-1106
total_tokens                                                        50
prompt_tokens                                                       24
completion_tokens                                                   26
seed                                                              None
temperature                                                        0.9
top_p                                                              0.9
max_tokens                                                        None
dtype: object

In [9]:
# Multiple message bundle dicts can be converted into pandas DataFrame
# NOTE: if an API call fails, then `None` will be returned. `None` items cannot
# be directly converted into pd.DataFrame
responses = []
with ProgressBar() as p:
    for _ in p.track(range(5)):
        response = model.create_chat_completion(
            system_message="You are a joke telling machine.",
            user_message="Tell me something about apples.",
            output_format="bundle_dict",
            temperature=0.4,
            seed=None,
        )
        responses += [response]

df = pd.DataFrame(responses)
df

Output()

Unnamed: 0,id,system_message,user_message,response_message,created_time,model,total_tokens,prompt_tokens,completion_tokens,seed,temperature,top_p,max_tokens
0,chatcmpl-8L3gbJgmQkv7MAp1k9g7PWMFVf2fv,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700029749,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
1,chatcmpl-8L3gbjIEI67yx86MuT8AxJUcvTUZl,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700029749,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
2,chatcmpl-8L3gc07NrWpn4pit6ENZI9kg669at,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700029750,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
3,chatcmpl-8L3gdnQ4r7PYeYyzMwK0US99ergnk,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700029751,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
4,chatcmpl-8L3gegHtJNu4R2agu2qe4fa6WJCPF,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700029752,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,


In [10]:
# If an API call fails, this method will automatically retry and make another API call.
# By default it will retry 5 times.  We can change this value to 2.
bundle_dict = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="bundle_dict",
    num_retries=2,
)
print(bundle_dict)

{'id': 'chatcmpl-8L3gh1rg2KkLdMJ69kbXWUChXSSZW', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': 'Sure! Did you hear about the apple who won the marathon? He took the lead because he knew how to stay core-geous!', 'created_time': 1700029755, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 52, 'prompt_tokens': 24, 'completion_tokens': 28, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


In [11]:
# The `create_chat_completion` method is syntactic sugar for `chat_completion`.
# It simply formats the message for us.
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]

bundle_dict = model.chat_completion(
    messages=messages,
    output_format="bundle_dict",
    num_retries=2,
)
print(bundle_dict)

{'id': 'chatcmpl-8L3giY5jqb9h5ym4lIk5tcqSjbkt7', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': 'Why did the apple go to therapy? Because it had too many core issues!', 'created_time': 1700029756, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 40, 'prompt_tokens': 24, 'completion_tokens': 16, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


 ### Making API calls using asynchronous (non-blocking) client

 This enables concurrent API calls.  We can control the max concurrency.

 Async uses the asyncio paradigm.  We need to run an asyncio event loop to
 use these functions.
 NOTE: a jupyter notebook has an asyncio event loop running by default,
 but you need to create your own asyncio event loop in a python script

In [4]:
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]

response = await model.async_chat_completion(messages=messages, num_retries=1)  # noqa: F704:
response


ChatCompletion(id='chatcmpl-8L3lE2R2kHYUr9pLLEjAi25JYdgEv', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content="Why did the apple break up with the orange? Because it couldn't peel the connection anymore!", role='assistant', function_call=None, tool_calls=None))], created=1700030036, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage=CompletionUsage(completion_tokens=19, prompt_tokens=24, total_tokens=43))

In [3]:
# Duplicate Messages x 5 times so that we can make 5 API calls
messages_list = [messages] * 5
messages_list

[[{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}]]

In [5]:
# Use Async Chat Completions, limit to 2 concurrent API calls at any given time
responses_list = await model.async_chat_completions(  # noqa: F704
    messages_list=messages_list,
    num_concurrent=2,
    num_retries=1,
    output_format="bundle_dict",
)

df = pd.DataFrame(responses_list)
df

Output()

Unnamed: 0,id,system_message,user_message,response_message,created_time,model,total_tokens,prompt_tokens,completion_tokens,seed,temperature,top_p,max_tokens
0,chatcmpl-8L3lH6tXF0YpSS2tD7Qb61ueRNAS3,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to therapy? Because it ha...,1700030039,gpt-3.5-turbo-1106,40,24,16,,0.9,0.9,
1,chatcmpl-8L3lH5r91VytJLTG90igzFjLsy8W7,You are a joke telling machine.,Tell me something about apples.,Why did the apple break up with the banana? Be...,1700030039,gpt-3.5-turbo-1106,45,24,21,,0.9,0.9,
2,chatcmpl-8L3lIibNjMiOWHj5uXh8zbwSdZeAP,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple who went to...,1700030040,gpt-3.5-turbo-1106,54,24,30,,0.9,0.9,
3,chatcmpl-8L3lIqWJnB1E30PvtHr18pNLa63AP,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple who won the...,1700030040,gpt-3.5-turbo-1106,47,24,23,,0.9,0.9,
4,chatcmpl-8L3lIGl1VGjvywbjHXiHx1xjPD01e,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700030040,gpt-3.5-turbo-1106,41,24,17,,0.9,0.9,


 ### Example of using `Message` and `validation_callback`

 The `Message` wrapper allows packaging arbitrary user-defined metadata along with each message
 which is a good place to put labels, notes, etc.

 The `validation_callback` argument enables the user to define
 specific logic to validate the response from each API call to OpenAI
 for each message.  Passed into the callback function is the original
 `messages` and the `response`.  If the `messages` is a `Message` object,
 this will be returned in `validation_callback` for access to all metadata.
 `response` is the LLM response after being parsed and formated as specified
 in `output_format`.

In [6]:
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]
m = Message(messages=messages, metadata={"a": 1})


def validation_callback_fn(messages, response) -> bool:
    print(f"In Callback. Messages: {messages}")
    print(f"In Callback. Response: {response}")
    print("\n")
    metadata = messages.metadata
    if "a" in metadata:
        return metadata["a"] == 1
    else:
        return False


# Instantiate wrapper around OpenAI's API
model = ChatModel(model="gpt-3.5-turbo-1106")
# Make ChatCompletion with...
# - using Message wrapper and include metadata (ChatModel automatically unpacks Message.messages)
# - parse raw OpenAI response into "simple" string format
# - then call the `validation_callback_fn` that we defined.  ChatModel always passes in
#   original messages input and parsed response as the 1st and 2nd arguments.  The
#   `validation_callback_fn` can contain any logic, but ultimately needs to return `True` vs `False`
#   to accept or reject the response.  If the response is rejected, ChatModel automatically retries.
# - allow up to 1 retry.  If still fails/rejected after 1 retry, then will return `None`.
response = model.chat_completion(
    m,
    output_format="simple",
    validation_callback=validation_callback_fn,
    num_retries=1,
)
response


In Callback. Messages: Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1})
In Callback. Response: Why did the apple go to the doctor? Because it wasn't peeling well!




"Why did the apple go to the doctor? Because it wasn't peeling well!"

In [7]:
# Multiple concurrent async chat completions using Message
# NOTE: we make the 3rd Message with different metadata.  This should cause
# the `validation_callback_fn` to reject the response for only the 3rd Message in list
# and retry only the 3rd Message.
m_list = [m] * 2 + [Message(messages=messages, metadata={"b": 2})]
m_list

[Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1}),
 Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1}),
 Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'b': 2})]

In [10]:
# Use Async Chat Completions, limit to 2 concurrent API calls at any given time & 1 retry
responses_list = await model.async_chat_completions(  # noqa: F704
    messages_list=m_list,
    num_concurrent=2,
    num_retries=1,
    validation_callback=validation_callback_fn,
    output_format="simple",
)

Output()

In [11]:
# Examine responses.
# - We should get valid responses for the first 2 responses.
# - The 3rd response should always be `None` because the metadata cannot pass at
#   `validation_callback_fn`
responses_list


['Why did the apple stop in the middle of the road? Because it ran out of juice!',
 'Sure! Did you hear about the apple who went to the doctor? The doctor said, "You\'re not looking so good, you\'re a little \'core\'!"',
 None]