In [None]:
# Install modified lib from github
!python -m pip install --no-cache-dir -U git+https://github.com/ffflowww/optillm.git@qwen-vllm

In [1]:
from openai import OpenAI
import requests
import json
import numpy as np
from typing import List, Tuple
from optillm.cot_decoding_vllm import cot_decode_vllm
from transformers import AutoTokenizer
import warnings

In [2]:
model = "Qwen/Qwen2.5-72B-Instruct"
base_url = ""
api_key = ""

client = OpenAI(
    base_url=base_url,
    api_key=api_key,
)

client.chat.completions.create(model=model, messages=[{"role": "user", "content": "Hello!"}]).choices[0].message

ChatCompletionMessage(content='Hello! How can I assist you today?', refusal=None, role='assistant', function_call=None, tool_calls=[])

## Initial promt

In [3]:
system_prompt = "You are a smart assistant. At the very end of your response always give only exact answer after words 'Answer:' "
user_prompt = "In a dance class of 20 students, 20% enrolled in contemporary dance, 25% of the remaining enrolled in jazz dance, and the rest enrolled in hip-hop dance. What percentage of the entire students enrolled in hip-hop dance?"

## Case 1. Get the best answer

In [4]:
response = cot_decode_vllm(system_prompt, user_prompt, client, model, n_paths=1)
response

('To determine the percentage of students enrolled in hip-hop dance, we can follow these steps:\n\n1. Calculate the number of students enrolled in contemporary dance:\n   - 20% of 20 students = 0.20 * 20 = 4 students\n\n2. Calculate the number of remaining students after the contemporary dance enrollment:\n   - 20 - 4 = 16 students\n\n3. Calculate the number of students enrolled in jazz dance:\n   - 25% of the remaining 16 students = 0.25 * 16 = 4 students\n\n4. Calculate the number of students enrolled in hip-hop dance:\n   - Total students - (students in contemporary + students in jazz) = 20 - (4 + 4) = 12 students\n\n5. Calculate the percentage of students enrolled in hip-hop dance:\n   - (Number of students in hip-hop / Total number of students) * 100 = (12 / 20) * 100 = 60%\n\nAnswer: 60%',
 0.9417607200269825)

In [5]:
response = cot_decode_vllm(system_prompt, user_prompt, client, model, n_paths=1, debug_print=True)  # the same but with debug print
response

[CoT decode 1/1]
[Confidence:] 0.9550112714093978
[Real answer for confidence scoring:] ||all_text||
[Answer:] To determine the percentage of students who enrolled in hip-hop dance, let's break down the problem step-by-step:

1. **Total number of students**: 20

2. **Students enrolled in contemporary dance**:
   - 20% of 20 students = 0.20 * 20 = 4 students

3. **Remaining students after contemporary dance**:
   - 20 - 4 = 16 students

4. **Students enrolled in jazz dance**:
   - 25% of the remaining 16 students = 0.25 * 16 = 4 students

5. **Students enrolled in hip-hop dance**:
   - Remaining students = 16 - 4 = 12 students

6. **Percentage of students enrolled in hip-hop dance**:
   - (12 students / 20 students) * 100% = 60%

Answer: 60%


("To determine the percentage of students who enrolled in hip-hop dance, let's break down the problem step-by-step:\n\n1. **Total number of students**: 20\n\n2. **Students enrolled in contemporary dance**:\n   - 20% of 20 students = 0.20 * 20 = 4 students\n\n3. **Remaining students after contemporary dance**:\n   - 20 - 4 = 16 students\n\n4. **Students enrolled in jazz dance**:\n   - 25% of the remaining 16 students = 0.25 * 16 = 4 students\n\n5. **Students enrolled in hip-hop dance**:\n   - Remaining students = 16 - 4 = 12 students\n\n6. **Percentage of students enrolled in hip-hop dance**:\n   - (12 students / 20 students) * 100% = 60%\n\nAnswer: 60%",
 0.9550112714093978)

In [6]:
response = cot_decode_vllm(system_prompt, user_prompt, client, model, n_paths=1, debug_print=True, answer_keywords='Answer:')  # the same but with debug print and exact answer keywords
response

[CoT decode 1/1]
[Confidence:] 0.999782355929016
[Real answer for confidence scoring:]  60%
[Answer:] To determine the percentage of students enrolled in hip-hop dance, we can follow these steps:

1. Calculate the number of students enrolled in contemporary dance:
   - 20% of 20 students = 0.20 * 20 = 4 students

2. Determine the number of remaining students after those in contemporary dance are accounted for:
   - 20 students - 4 students = 16 students

3. Calculate the number of students enrolled in jazz dance:
   - 25% of 16 students = 0.25 * 16 = 4 students

4. Determine the number of students enrolled in hip-hop dance:
   - Total students - students in contemporary - students in jazz = 20 - 4 - 4 = 12 students

5. Calculate the percentage of students enrolled in hip-hop dance:
   - (12 students / 20 students) * 100% = 60%

Answer: 60%


('To determine the percentage of students enrolled in hip-hop dance, we can follow these steps:\n\n1. Calculate the number of students enrolled in contemporary dance:\n   - 20% of 20 students = 0.20 * 20 = 4 students\n\n2. Determine the number of remaining students after those in contemporary dance are accounted for:\n   - 20 students - 4 students = 16 students\n\n3. Calculate the number of students enrolled in jazz dance:\n   - 25% of 16 students = 0.25 * 16 = 4 students\n\n4. Determine the number of students enrolled in hip-hop dance:\n   - Total students - students in contemporary - students in jazz = 20 - 4 - 4 = 12 students\n\n5. Calculate the percentage of students enrolled in hip-hop dance:\n   - (12 students / 20 students) * 100% = 60%\n\nAnswer: 60%',
 0.999782355929016)

## Case 2. Show all answers and its confidence scores (is_return_all=True). Showcase of all supported parameters

In [8]:
response = cot_decode_vllm(system_prompt=system_prompt,
                           user_prompt=user_prompt,
                           client=client,
                           model=model,
                           n_paths=10,  # default
                           max_new_tokens=512,  # default
                           temperature=1.0,  # default
                           top_p=1.0,  # default
                           answer_keywords='Answer:',
                           tokenizer_name='',  # default
                           aggregate_paths=False,  # default
                           return_all=True,
                           debug_print=True,
                           return_completion_tokens=False  # default
)

for i, resp in enumerate(response):
    print(f"({i + 1}/{len(response)}) Score: {resp[1]}")
    print(f"Answer: {resp[0]}\n")

[CoT decode 1/10]
[Confidence:] 0.9993208837777148
[Real answer for confidence scoring:]  60%
[Answer:] To find the percentage of students enrolled in hip-hop dance, we need to follow these steps:

1. Calculate the number of students enrolled in contemporary dance.
2. Subtract the number of contemporary dance students from the total to find the remaining students.
3. Calculate the number of students enrolled in jazz dance from the remaining students.
4. Subtract the number of jazz dance students from the remaining to find the number of students enrolled in hip-hop dance.
5. Calculate the percentage of students enrolled in hip-hop dance.

Let's perform the calculations:

1. Number of students enrolled in contemporary dance:
   \[
   20\% \text{ of } 20 = 0.20 \times 20 = 4 \text{ students}
   \]

2. Remaining students after contemporary dance:
   \[
   20 - 4 = 16 \text{ students}
   \]

3. Number of students enrolled in jazz dance:
   \[
   25\% \text{ of } 16 = 0.25 \times 16 = 4 \tex

## Playground below (test zone)

In [9]:
"""THIS IS AN EXACT RIGHT PROMPT BELOW FOR QWEN 2.5 INSTRUCT"""

"""
<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
In a dance class of 20 students, 20% enrolled in contemporary dance, 25% of the remaining enrolled in jazz dance, and the rest enrolled in hip-hop dance. What percentage of the entire students enrolled in hip-hop dance?<|im_end|>
<|im_start|>assistant

"""

'\n<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\nIn a dance class of 20 students, 20% enrolled in contemporary dance, 25% of the remaining enrolled in jazz dance, and the rest enrolled in hip-hop dance. What percentage of the entire students enrolled in hip-hop dance?<|im_end|>\n<|im_start|>assistant\n\n'

In [10]:
completion = client.chat.completions.create(
    model=model,
    messages=[
        {"role": "user", "content": "Hello!"}
  ]
)

completion.choices[0].message

ChatCompletionMessage(content='Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with anything specific.', refusal=None, role='assistant', function_call=None, tool_calls=[])

In [11]:
messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

tokenizer = AutoTokenizer.from_pretrained(model)
base_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

completion_init = client.completions.create(
        model=model,
        prompt=[base_prompt, base_prompt, base_prompt],
        max_tokens=3,
        logprobs=1,
    )
completion_init

Completion(id='cmpl-a6270dfa4e4a4b2e9302b8231692b015', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=Logprobs(text_offset=[0, 2, 12], token_logprobs=[-0.2157096564769745, -0.5551704168319702, -0.018150897696614265], tokens=['To', ' determine', ' the'], top_logprobs=[{'To': -0.2157096564769745}, {' determine': -0.5551704168319702}, {' the': -0.018150897696614265}]), text='To determine the', stop_reason=None, prompt_logprobs=None), CompletionChoice(finish_reason='length', index=1, logprobs=Logprobs(text_offset=[0, 2, 7], token_logprobs=[-0.2157096564769745, -1.4301704168319702, -0.14721615612506866], tokens=['To', ' find', ' the'], top_logprobs=[{'To': -0.2157096564769745}, {' find': -1.4301704168319702, ' determine': -0.5551704168319702}, {' the': -0.14721615612506866}]), text='To find the', stop_reason=None, prompt_logprobs=None), CompletionChoice(finish_reason='length', index=2, logprobs=Logprobs(text_offset=[0, 2, 7], token_logprobs=[-0.2157096564769745, -1.4301

In [12]:
completion_init.usage.completion_tokens

9

In [13]:
completion_init = client.completions.create(
        model=model,
        prompt=base_prompt,
        max_tokens=1,
        logprobs=3,
    )
completion_init
completion_init.choices[0].logprobs.top_logprobs[0]

{'To': -0.2693541944026947,
 'First': -2.0193541049957275,
 'Let': -2.2693541049957275}

In [14]:
completion_init.usage.completion_tokens

1

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer

checkpoint = "Qwen/Qwen2.5-72B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
 ]
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
print(tokenizer.decode(tokenized_chat[0]))

<|im_start|>system
You are a friendly chatbot who always responds in the style of a pirate<|im_end|>
<|im_start|>user
How many helicopters can a human eat in one sitting?<|im_end|>
<|im_start|>assistant



In [6]:
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)

'<|im_start|>system\nYou are a friendly chatbot who always responds in the style of a pirate<|im_end|>\n<|im_start|>user\nHow many helicopters can a human eat in one sitting?<|im_end|>\n'

In [4]:
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

'<|im_start|>system\nYou are a friendly chatbot who always responds in the style of a pirate<|im_end|>\n<|im_start|>user\nHow many helicopters can a human eat in one sitting?<|im_end|>\n<|im_start|>assistant\n'

In [5]:
build_qwen_chat_prompt(messages[0]['content'], messages[1]['content'])

'<|im_start|>system\nYou are a friendly chatbot who always responds in the style of a pirate<|im_end|>\n<|im_start|>user\nHow many helicopters can a human eat in one sitting?<|im_end|>\n<|im_start|>assistant\n'

In [18]:
print(tokenizer.encode("What are you doing here?"))
print(len(tokenizer.encode("What are you doing here?")))

[3838, 525, 498, 3730, 1588, 30]
6


In [11]:
tokenizer.apply_chat_template("What are you doing here?", tokenize=False, add_generation_prompt=False)

'<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n'