 ## Example in how to use ChatModel Wrapper

In [1]:
import pandas as pd

from automated_llm_eval.chat_model import ChatModel, Message
from automated_llm_eval.utils import ProgressBar

# Instantiate wrapper around OpenAI's API
model = ChatModel(model="gpt-3.5-turbo-1106")
# model = ChatModel(model="gpt-4-1106-preview")
model

ChatModel(sync_client=<openai.OpenAI object at 0x11f389690>, async_client=<openai.AsyncOpenAI object at 0x11f3aca50>, model='gpt-3.5-turbo-1106', temperature=0.9, top_p=0.9, max_tokens=None, n=1, seed=None)

In [2]:
# You can adjust other model settings globally for all API calls
model2 = ChatModel(model="gpt-3.5-turbo-1106", temperature=0.5, top_p=0.5, max_tokens=300, seed=42)
model2

ChatModel(sync_client=<openai.OpenAI object at 0x11f389690>, async_client=<openai.AsyncOpenAI object at 0x11f3aca50>, model='gpt-3.5-turbo-1106', temperature=0.5, top_p=0.5, max_tokens=300, n=1, seed=42)

In [3]:
# `max_tokens = None` means no max_token limit (this is the default)
model2 = ChatModel(model="gpt-3.5-turbo-1106", temperature=0.5, top_p=0.5, max_tokens=None, seed=42)
model2

ChatModel(sync_client=<openai.OpenAI object at 0x11f389690>, async_client=<openai.AsyncOpenAI object at 0x11f3aca50>, model='gpt-3.5-turbo-1106', temperature=0.5, top_p=0.5, max_tokens=None, n=1, seed=42)

 ### Making API calls using synchronous (blocking) client

In [4]:
# Make API call, get response message.
# Note: `output_format = "simple"`
response_message = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="simple",
)
print(response_message)

Why did the apple break up with the banana? 
Because it couldn't find a core connection!


In [5]:
# Make API call, get original ChatCompletion object.
# Note: `output_format = None`
response = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format=None,
)
print(response)

ChatCompletion(id='chatcmpl-8L3W9TIldkm9lp5Id5smuRHgN0w9e', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content="Why did the apple go to the doctor? Because it wasn't peeling well!", role='assistant', function_call=None, tool_calls=None))], created=1700029101, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage=CompletionUsage(completion_tokens=17, prompt_tokens=24, total_tokens=41))


In [6]:
# Make API call, get response packaged with input + metadata.
# Note: `output_format = "message_bundle"`
message_bundle = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="message_bundle",
)
print(message_bundle)

MessageBundle(id='chatcmpl-8L3WAFh0fxiMJi3jVhpOkctDBV7ZT', system_message='You are a joke telling machine.', user_message='Tell me something about apples.', response_message="Why did the apple go to the doctor? Because it wasn't peeling well!", created_time=1700029102, model='gpt-3.5-turbo-1106', total_tokens=41, prompt_tokens=24, completion_tokens=17, seed=None, temperature=0.9, top_p=0.9, max_tokens=None)


In [7]:
# Make API call, get MessageBundle as a dict.
# Note: `output_format = "message_bundle_dict"`
message_bundle_dict = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="message_bundle_dict",
)
print(message_bundle_dict)

{'id': 'chatcmpl-8L3WByxod4QVo6QmhwaTTkwj36SPL', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': "Why did the apple go to the doctor? Because it wasn't peeling well!", 'created_time': 1700029103, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 41, 'prompt_tokens': 24, 'completion_tokens': 17, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


In [8]:
# Message bundle dict can be converted into pandas Series easily
s = pd.Series(message_bundle_dict)
s

id                              chatcmpl-8L3WByxod4QVo6QmhwaTTkwj36SPL
system_message                         You are a joke telling machine.
user_message                           Tell me something about apples.
response_message     Why did the apple go to the doctor? Because it...
created_time                                                1700029103
model                                               gpt-3.5-turbo-1106
total_tokens                                                        41
prompt_tokens                                                       24
completion_tokens                                                   17
seed                                                              None
temperature                                                        0.9
top_p                                                              0.9
max_tokens                                                        None
dtype: object

In [9]:
# Multiple message bundle dicts can be converted into pandas DataFrame
# NOTE: if an API call fails, then `None` will be returned. `None` items cannot
# be directly converted into pd.DataFrame
responses = []
with ProgressBar() as p:
    for _ in p.track(range(5)):
        response = model.create_chat_completion(
            system_message="You are a joke telling machine.",
            user_message="Tell me something about apples.",
            output_format="message_bundle_dict",
            temperature=0.4,
            seed=None,
        )
        responses += [response]

df = pd.DataFrame(responses)
df

Output()

Unnamed: 0,id,system_message,user_message,response_message,created_time,model,total_tokens,prompt_tokens,completion_tokens,seed,temperature,top_p,max_tokens
0,chatcmpl-8L3WCfvuieczCx930XRR6nf4oAp6L,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple who went to...,1700029104,gpt-3.5-turbo-1106,57,24,33,,0.4,0.9,
1,chatcmpl-8L3WD817kxZs2YBA9iE21oyN0Qy7N,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to therapy? Because it ha...,1700029105,gpt-3.5-turbo-1106,40,24,16,,0.4,0.9,
2,chatcmpl-8L3WESOE8EPLVXy2da9RIYwD4Ly7i,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to therapy? Because it ha...,1700029106,gpt-3.5-turbo-1106,40,24,16,,0.4,0.9,
3,chatcmpl-8L3WEUWxZNWxIDQH7csenDj9RPypv,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700029106,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,
4,chatcmpl-8L3WHFT97y0VMqI5WS2XVkXnVRtb4,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700029109,gpt-3.5-turbo-1106,41,24,17,,0.4,0.9,


In [10]:
# If an API call fails, this method will automatically retry and make another API call.
# By default it will retry 5 times.  We can change this value to 2.
message_bundle_dict = model.create_chat_completion(
    system_message="You are a joke telling machine.",
    user_message="Tell me something about apples.",
    output_format="message_bundle_dict",
    num_retries=2,
)
print(message_bundle_dict)

{'id': 'chatcmpl-8L3WINIS7OgDe8wlZ2avZQqaadGh6', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': "Why did the apple go to the doctor? Because it wasn't peeling well!", 'created_time': 1700029110, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 41, 'prompt_tokens': 24, 'completion_tokens': 17, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


In [11]:
# The `create_chat_completion` method is syntactic sugar for `chat_completion`.
# It simply formats the message for us.
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]

message_bundle_dict = model.chat_completion(
    messages=messages,
    output_format="message_bundle_dict",
    num_retries=2,
)
print(message_bundle_dict)

{'id': 'chatcmpl-8L3WOZPK1FLUwmRFPlkDkWjsRV5YT', 'system_message': 'You are a joke telling machine.', 'user_message': 'Tell me something about apples.', 'response_message': 'Why did the apple go to therapy? Because it had too many core issues!', 'created_time': 1700029116, 'model': 'gpt-3.5-turbo-1106', 'total_tokens': 40, 'prompt_tokens': 24, 'completion_tokens': 16, 'seed': None, 'temperature': 0.9, 'top_p': 0.9, 'max_tokens': None}


 ### Making API calls using asynchronous (non-blocking) client

 This enables concurrent API calls.  We can control the max concurrency.

 Async uses the asyncio paradigm.  We need to run an asyncio event loop to
 use these functions.
 NOTE: a jupyter notebook has an asyncio event loop running by default,
 but you need to create your own asyncio event loop in a python script

In [12]:
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]

response = await model.async_chat_completion(messages=messages, num_retries=1)  # noqa: F704:
response


ChatCompletion(id='chatcmpl-8L3WPSEyFwDZa6QxecK3yn00RcNRI', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='Sure! Did you hear about the apple who went to the doctor? It wasn\'t feeling well, but the doctor said, "Don\'t worry, you just need to stop hanging out with bad seeds!"', role='assistant', function_call=None, tool_calls=None))], created=1700029117, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint='fp_eeff13170a', usage=CompletionUsage(completion_tokens=41, prompt_tokens=24, total_tokens=65))

In [13]:
# Duplicate Messages x 5 times so that we can make 5 API calls
messages_list = [messages] * 5
messages_list

[[{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}],
 [{'role': 'system', 'content': 'You are a joke telling machine.'},
  {'role': 'user', 'content': 'Tell me something about apples.'}]]

In [14]:
# Use Async Chat Completions, limit to 2 concurrent API calls at any given time
responses_list = await model.async_chat_completions(  # noqa: F704
    messages_list=messages_list,
    num_concurrent=2,
    num_retries=1,
    output_format="message_bundle_dict",
)

df = pd.DataFrame(responses_list)
df

Output()

Unnamed: 0,id,system_message,user_message,response_message,created_time,model,total_tokens,prompt_tokens,completion_tokens,seed,temperature,top_p,max_tokens
0,chatcmpl-8L3WQBtmZrNvRNksvFTlmx8KNNi4H,You are a joke telling machine.,Tell me something about apples.,Sure! Did you hear about the apple who went to...,1700029118,gpt-3.5-turbo-1106,50,24,26,,0.9,0.9,
1,chatcmpl-8L3WQSMF8GINMZIVILStFjkEuL0uh,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to therapy?\nBecause it h...,1700029118,gpt-3.5-turbo-1106,40,24,16,,0.9,0.9,
2,chatcmpl-8L3WRp8Ypiq37govF5g9VOKODd9fa,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to school?\n\nBecause it ...,1700029119,gpt-3.5-turbo-1106,43,24,19,,0.9,0.9,
3,chatcmpl-8L3WSVlnUVFaAXyrALJP3Xemd0ib7,You are a joke telling machine.,Tell me something about apples.,Why did the apple go to the doctor? Because it...,1700029120,gpt-3.5-turbo-1106,41,24,17,,0.9,0.9,
4,chatcmpl-8L3WS8AOOit8pLrxtxoIiIKFGbzzI,You are a joke telling machine.,Tell me something about apples.,Why did the apple stop in the middle of the ro...,1700029120,gpt-3.5-turbo-1106,43,24,19,,0.9,0.9,


 ### Example of using `Message` and `validation_callback`

 The `Message` wrapper allows packaging arbitrary user-defined metadata along with each message
 which is a good place to put labels, notes, etc.

 The `validation_callback` argument enables the user to define
 specific logic to validate the response from each API call to OpenAI
 for each message.  Passed into the callback function is the original
 `messages` and the `response`.  If the `messages` is a `Message` object,
 this will be returned in `validation_callback` for access to all metadata.
 `response` is the LLM response after being parsed and formated as specified
 in `output_format`.

In [15]:
system_message = "You are a joke telling machine."
user_message = "Tell me something about apples."
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": user_message},
]
m = Message(messages=messages, metadata={"a": 1})


def validation_callback_fn(messages, response) -> bool:
    print(f"In Callback. Messages: {messages}")
    print(f"In Callback. Response: {response}")
    print("\n")
    metadata = messages.metadata
    if "a" in metadata:
        return metadata["a"] == 1
    else:
        return False


# Instantiate wrapper around OpenAI's API
model = ChatModel(model="gpt-3.5-turbo-1106")
# Make ChatCompletion with...
# - using Message wrapper and include metadata (ChatModel automatically unpacks Message.messages)
# - parse raw OpenAI response into "simple" string format
# - then call the `validation_callback_fn` that we defined.  ChatModel always passes in
#   original messages input and parsed response as the 1st and 2nd arguments.  The
#   `validation_callback_fn` can contain any logic, but ultimately needs to return `True` vs `False`
#   to accept or reject the response.  If the response is rejected, ChatModel automatically retries.
# - allow up to 1 retry.  If still fails/rejected after 1 retry, then will return `None`.
response = model.chat_completion(
    m,
    output_format="simple",
    validation_callback=validation_callback_fn,
    num_retries=1,
)
response


In Callback. Messages: Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1})
In Callback. Response: Why did the apple go to therapy? Because it had too many cores issues!




'Why did the apple go to therapy? Because it had too many cores issues!'

In [16]:
# Multiple concurrent async chat completions using Message
# NOTE: we make the 3rd Message with different metadata.  This should cause
# the `validation_callback_fn` to reject the response for only the 3rd Message in list
# and retry only the 3rd Message.
m_list = [m] * 2 + [Message(messages=messages, metadata={"b": 2})]
m_list

[Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1}),
 Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'a': 1}),
 Message(messages=[{'role': 'system', 'content': 'You are a joke telling machine.'}, {'role': 'user', 'content': 'Tell me something about apples.'}], metadata={'b': 2})]

In [17]:
# Use Async Chat Completions, limit to 2 concurrent API calls at any given time & 1 retry
responses_list = await model.async_chat_completions(  # noqa: F704
    messages_list=m_list,
    num_concurrent=2,
    num_retries=1,
    validation_callback=validation_callback_fn,
    output_format="simple",
)

Output()

In [18]:
# Examine responses.
# - We should get valid responses for the first 2 responses.
# - The 3rd response should always be `None` because the metadata cannot pass at
#   `validation_callback_fn`
responses_list


['Sure! Did you hear about the apple that went to a party? It was a-peeling!',
 "Why did the apple go to the doctor? Because it wasn't peeling well!",
 None]