In [1]:
# Imports:

import os
import huggingface_hub
import datasets
from pprint import pprint
from types import SimpleNamespace
import asyncio
import nest_asyncio
import oaib

In [2]:
# Define constants:

# Dataset related.
ds_conf = SimpleNamespace(
    name = "lmsys/chatbot_arena_conversations",
    skip_rows = 1,
    rows_to_process = 50,
)

# Model Related. Prices: https://openai.com/api/pricing/
# model_conf = SimpleNamespace(
#     name = "gpt-4o", 
#     in_tok_price = 1.25/1e6, 
#     out_tok_price = 10.00/1e6,
# )
model_conf = SimpleNamespace(
    name = "gpt-4o-mini-2024-07-18",
    in_tok_price = 0.15/1e6,
    out_tok_price = 0.60/1e6,
)

# Output file related.
out_file_name = f"{model_conf.name}_arena_brief_dataset.jsonl"

# Prompt related.
class LimitedPrompt:
    def __init__(self, max_words):
        self.max_words = max_words
        # A word is "between 5 and 6.5 characters per word including spaces and punctuation":
        # https://charactercounter.com/characters-to-words
        self.max_ch_soft = max_words * 6
        # Hard limit adds 20% buffer and divides by 4 to get LLM token limit.
        # https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
        self.max_tokens_hard = int(self.max_ch_soft * 1.2 / 4)

normal_prompt = LimitedPrompt(120)
normal_prompt.text = f'''Answer the user prompt below "---" line. Never exceed {normal_prompt.max_ch_soft} characters / {normal_prompt.max_words} words.
---
{{question}}
'''

brief_prompt = LimitedPrompt(20)
brief_prompt.text = f'''Given the user prompt and a normal answer, generate a brief answer. A brief answer should be as short as possible but still answer the question and give relevant information. Never exceed {brief_prompt.max_ch_soft} characters / {brief_prompt.max_words} words.
Examples between --- lines:
--- Example 1 ---
Your input:
Question: How much is 2+3?
Normal answer: Expression 2+3 is equal to 5.
Your output:
5
--- Example 2 ---
Your input:
Question: What is the color of the sky?
Normal answer: The sky is blue.
Your output:
Blue
--- End of examples

Considering all the above, give a brief answer to the prompt and normal answer below:
Question: {{question}}
Normal answer: {{normal_asnwer}}
Your output: 
'''

# Run related.
batch_size = 30
total_price = 0.0

In [3]:
# Load the dataset from HuggingFace.

huggingface_hub.login(token=os.getenv("HF_KEY"))

ds = datasets.load_dataset(ds_conf.name)['train']

if ds_conf.rows_to_process==None:
    ds_conf.rows_to_process=len(ds)-ds_conf.skip_rows

ds_range = range(ds_conf.skip_rows, ds_conf.skip_rows+ds_conf.rows_to_process)
ds = ds.select(ds_range) 
print(f"Loaded dataset range: {ds_range}")

ds = ds.select_columns(['question_id', 'conversation_a'])\
    .rename_column('question_id', 'question-id')\
    .map(lambda example: {'question': example['conversation_a'][0]['content']})

pprint(ds[4]) # Print a sample row.

Loaded dataset range: range(1, 51)
{'conversation_a': [{'content': 'Count from 1 to 10 with step = 3',
                     'role': 'user'},
                    {'content': '1, 4, 7, 10\n'
                                '\n'
                                'Counting with a step of 3 means that you will '
                                'increase the number by 3 each time. So, the '
                                'first number is 1, the next is 4 (1 + 3), the '
                                'next is 7 (4 + 3), and so on.',
                     'role': 'assistant'}],
 'question': 'Count from 1 to 10 with step = 3',
 'question-id': 'c0fc42c6f5f14f2aa5a89f71f8553730'}


In [4]:
# Define the generic batch processing function.

nest_asyncio.apply()

async def process_batch(batch, indices):
    print(f"Batch size: {len(indices)}, Start index: {indices[0]}")
    global total_price
    # This is very strange. If index is not specified, the API returns the results in a different
    # order. If the index is specified, the results are returned in the same order as the input. So,
    # we add index but never actually use it.
    auto_batch = oaib.Auto(workers=8, index=["idx"])
    # auto_batch = oaib.Auto(workers=8)
    for idx, prompt in enumerate(batch['prompt']):
        messages=[{"role": "user", "content": prompt}]
        await auto_batch.add("chat.completions.create", 
                             metadata={"idx": idx},
                             model=model_conf.name, 
                             messages=messages, 
                             max_tokens=batch['max-tokens'][idx])                       
    output = await auto_batch.run()
    answers, in_tokens, out_tokens = [], 0, 0
    for _, row in output.iterrows():
        answers.append(row.result['choices'][0]['message']['content'])
        in_tokens += row.result['usage']['prompt_tokens']
        out_tokens += row.result['usage']['completion_tokens']
    total_price += in_tokens*model_conf.in_tok_price + out_tokens*model_conf.out_tok_price
    return {'answer': answers}

def process_batch_sync(batch, indices):
    return asyncio.run(process_batch(batch, indices))

In [5]:
# Normal answers generation.

ds = ds.map(lambda row: {
    'prompt': normal_prompt.text.format(question=row['question']),
    'max-tokens': normal_prompt.max_tokens_hard })

ds = ds.map(process_batch_sync, batched=True, batch_size=batch_size, with_indices=True)\
    .rename_column('answer', 'normal-answer')

pprint(ds[4]) # Print a sample row.

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Batch size: 30, Start index: 0


  0%|          | 0/30 [00:00<?, ?req/s]

RPM:   0%|          | 0/500

TPM:   0%|          | 0/10000


Run took 6.57s.

Batch size: 20, Start index: 30


  0%|          | 0/20 [00:00<?, ?req/s]

RPM:   0%|          | 0/500

TPM:   0%|          | 0/10000


Run took 6.96s.

{'conversation_a': [{'content': 'Count from 1 to 10 with step = 3',
                     'role': 'user'},
                    {'content': '1, 4, 7, 10\n'
                                '\n'
                                'Counting with a step of 3 means that you will '
                                'increase the number by 3 each time. So, the '
                                'first number is 1, the next is 4 (1 + 3), the '
                                'next is 7 (4 + 3), and so on.',
                     'role': 'assistant'}],
 'max-tokens': 216,
 'normal-answer': 'Counting from 1 to 10 with a step of 3 gives us the '
                  'following sequence:\n'
                  '\n'
                  '1, 4, 7, 10\n'
                  '\n'
                  'Starting at 1, we add 3 each time until we reach or exceed '
                  "10. Here's the breakdown:\n"
                  '\n'
                  '1 (starting point)  \n'
                  '1 + 3 = 4  \n

In [6]:
# Brief answers generation.

ds = ds.map(lambda row: {
    'prompt': brief_prompt.text.format(
        question=row['question'], normal_asnwer=row['normal-answer']),
    'max-tokens': brief_prompt.max_tokens_hard })

pprint(ds[4]) # Print a sample row.

ds = ds.map(process_batch_sync, batched=True, batch_size=batch_size, with_indices=True)\
    .rename_column('answer', 'brief-answer')

pprint(ds[4]) # Print a sample row.

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

{'conversation_a': [{'content': 'Count from 1 to 10 with step = 3',
                     'role': 'user'},
                    {'content': '1, 4, 7, 10\n'
                                '\n'
                                'Counting with a step of 3 means that you will '
                                'increase the number by 3 each time. So, the '
                                'first number is 1, the next is 4 (1 + 3), the '
                                'next is 7 (4 + 3), and so on.',
                     'role': 'assistant'}],
 'max-tokens': 36,
 'normal-answer': 'Counting from 1 to 10 with a step of 3 gives us the '
                  'following sequence:\n'
                  '\n'
                  '1, 4, 7, 10\n'
                  '\n'
                  'Starting at 1, we add 3 each time until we reach or exceed '
                  "10. Here's the breakdown:\n"
                  '\n'
                  '1 (starting point)  \n'
                  '1 + 3 = 4  \n'
                 

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Batch size: 30, Start index: 0


  0%|          | 0/30 [00:00<?, ?req/s]

RPM:   0%|          | 0/500

TPM:   0%|          | 0/10000


Run took 2.69s.

Batch size: 20, Start index: 30


  0%|          | 0/20 [00:00<?, ?req/s]

RPM:   0%|          | 0/500

TPM:   0%|          | 0/10000


Run took 2.90s.

{'brief-answer': '1, 4, 7, 10',
 'conversation_a': [{'content': 'Count from 1 to 10 with step = 3',
                     'role': 'user'},
                    {'content': '1, 4, 7, 10\n'
                                '\n'
                                'Counting with a step of 3 means that you will '
                                'increase the number by 3 each time. So, the '
                                'first number is 1, the next is 4 (1 + 3), the '
                                'next is 7 (4 + 3), and so on.',
                     'role': 'assistant'}],
 'max-tokens': 36,
 'normal-answer': 'Counting from 1 to 10 with a step of 3 gives us the '
                  'following sequence:\n'
                  '\n'
                  '1, 4, 7, 10\n'
                  '\n'
                  'Starting at 1, we add 3 each time until we reach or exceed '
                  "10. Here's the breakdown:\n"
                  '\n'
                  '1 (starting point)  \n'
 

In [7]:
### DEBUG CELL ###

for i in range(41, 45):
    print(f"Q: {ds[i]['question']}")
    print(f"A: {ds[i]['normal-answer']}") 
    print(f"B: {ds[i]['brief-answer']}") 
    print("-------------------------------")

print(f"Total price: {total_price:.2f} USD")

Q: Paint an ASCII art image of the moon using emojis
A: Sure! Here's an ASCII art representation of the moon using emojis:

```
       🌕
    🌕🌕🌕🌖🌕
  🌕🌕🌕🌕🌕🌕🌕
🌕🌕🌕🌕🌕🌕🌕🌕🌕
🌕🌕🌕🌑🌕🌕🌕🌕🌕
 🌕🌕🌕🌕🌕🌕🌕
   🌕🌕🌕🌖🌕
       🌕
```

Feel free to copy and use it wherever you like!
B: 🌕
-------------------------------
Q: Salut ! Tu es un méchant chatbot !
A: Salut ! Je suis désolé si je te donne cette impression. Mon but est d’aider et de rendre la conversation agréable. Si quelque chose t’a déplu, n’hésite pas à me le dire. Je suis là pour répondre à tes questions ou discuter de tout sujet qui t’intéresse. Que puis-je faire pour toi aujourd'hui ?
B: Désolé si tu penses ça, je suis ici pour aider ! Que puis-je faire pour toi ?
-------------------------------
Q: who was the last monarch of uk
A: As of October 2023, the last monarch of the United Kingdom was Queen Elizabeth II. She reigned from February 6, 1952, until her passing on September 8, 2022. Upon her death, her son, Charles III, ascended to the throne. 