# vLLM Speed UP Inference

In [None]:
from vllm import LLM, SamplingParams
max_model_len, tp_size = 8192, 1
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import numpy as np

def get_response_prompts(instructions):
    positive_prompts = []
    for instruction in instructions:
        messages = [
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": instruction}
        ]
        positive_prompts.append(messages)
    return positive_prompts

def get_worse_response_question_prompts(instructions, positive_responses, worse_response_prompt_template):
    worse_prompts = []
    for instruction, response in zip(instructions, positive_responses):
        messages = [
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": worse_response_prompt_template.format(input=instruction, generation=response)}
        ]
        worse_prompts.append(messages)
    return worse_prompts

def extract_negative_questions(instructions, positive_responses, inputs):
    all_negative_instructions = []
    temp_instructions = []
    temp_positive_responses = []
    for idx, item in enumerate(inputs):
        try:
            start = item.index("[The start of User Question]")+len("[The start of User Question] ")
            if("[The end of User Question]" in item):
                end = item.index("[The end of User Question]")
            else:
                end = len(item)
            question = item[start:end]
            all_negative_instructions.append(question)
            temp_instructions.append(instructions[idx])
            temp_positive_responses.append(positive_responses[idx])
        except:
            print("Generation String Not Found! ID:", idx)
            # print(negative)
    return temp_instructions, temp_positive_responses, all_negative_instructions

def extract_negative_responses(instructions, positive_responses, generated_text):
    all_responses = []
    for idx, (instruction, positive, negative) in enumerate(zip(instructions, positive_responses, generated_text)):
        try:
            start = negative.index("[The start of Modified Instruction Response]")+len("[The start of Modified Instruction Response] ")
            if("[The end of Modified Instruction Response]" in negative):
                end = negative.index("[The end of Modified Instruction Response]")
            else:
                end = len(negative)
            negative = negative[start:end]
            all_responses.append({"instruction": instruction, "positive": positive, "negative": negative})
        except:
            print("Generation String Not Found! ID:", idx)
            # print(negative)
    return all_responses

def create_judge_response_prompts(instructions, positives, negatives, judging_template):
    judge_prompts = []
    labels = []
    for instruction, positive, negative in zip(instructions, positives, negatives):
        rand_num = np.random.rand()
        if(rand_num < 0.5):
            messages = [
                {"role": "system", "content": "You are a helpful AI assistant."},
                {"role": "user", "content": judging_template.format(input=instruction, 
                                                                    generation=positive, 
                                                                    generation2=negative)},
            ]
            judge_prompts.append(messages)
            labels.append(1)
        else:
            messages = [
                {"role": "system", "content": "You are a helpful AI assistant."},
                {"role": "user", "content": judging_template.format(input=instruction, 
                                                                    generation=negative, 
                                                                    generation2=positive)},
            ]
            judge_prompts.append(messages)
            labels.append(0)
    return judge_prompts, labels

def load_textfile_as_string(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        return file.read()



In [None]:
messages = [
            {
                "role": "system",
                "content": "You are a helpful AI assistant."
            },
            {
                "role": "user",
                "content": "Please act as an impartial judge and evaluate the quality of the responses provided by two AI assistants to the user question displayed below. You should choose the assistant that follows the user's instructions and answers the user's question better. Begin your evaluation by first verifying whether each response contains any obvious or subtle errors. Then propose an appropriate evaluaiton rubric, e.g. 1-5 criteria that are important for evaluating responses to this specific user question. Continue your evaluation by checking each response carefully along those criteria. Based on the analysis in previous steps, choose which response is better overall. Avoid any position biases and ensure that the order in which the responses were presented does not influence your decision. Do not allow the length of the responses to influence your evaluation. Do not favor certain names of the assistants. Be as objective as possible. After providing your evaluation, output your final verdict by strictly following this format: \\\"[[A]]\\\" if assistant A is better, \\\"[[B]]\\\" if assistant B is better. \n\n[User Question]\nfast reading method when first few letters of words are bold\n\n[The Start of Assistant A's Answer]\nYou're referring to a technique called \"Rapid Serial Visual Presentation\" (RSVP) with bolded first few letters! This method can indeed help you read faster and more efficiently. Here's how it works:\n\n1. Identify the first few letters of each word that are bolded. This can be 2-3 letters, depending on your preference.\n2. Focus on the bolded letters to quickly identify the word. This helps your brain to recognize the word more quickly and accurately.\n3. Move your eyes to the next word, repeating the process of identifying the bolded letters and recognizing the word.\n4. Continue this process at a rapid pace, moving your eyes from one word to the next.\n\nThis technique can help you read faster in several ways:\n\n* Reduced eye movement: By focusing on the bolded letters, you reduce the amount of eye movement needed to read each word, allowing you to move more quickly through the text.\n* Increased recognition: The bolded letters help your brain to recognize words more quickly, as your brain is able to latch onto the familiar pattern of letters.\n* Improved processing: The bolded letters can also help your brain to process the words more efficiently, as the bolded letters provide a clear cue for your brain to recognize the word.\n\nTo get the most out of this technique, try the following:\n\n* Start with a moderate pace and gradually increase your speed as you become more comfortable with the technique.\n* Practice with different texts and materials to get a feel for how the technique works with different types of writing.\n* Experiment with different bolding patterns, such as bolding the first letter of each word or the first letter of every other word, to find what works best for you.\n\nRemember, the key to success with this technique is to practice regularly and develop a consistent reading rhythm. With time and practice, you can improve your reading speed and comprehension using this method!\n[The End of Assistant A's Answer]\n\n[The Start of Assistant B's Answer]\nYou're interested in learning a fast reading method that utilizes visual cues to improve recognition and processing! I'd be happy to help you with that.\n\nThe method I'd like to introduce you to is called the \"Meta-Guiding\" technique. It's a visual cue-based method that can help you read faster and more efficiently.\n\n**What is Meta-Guiding?**\n\nMeta-Guiding is a technique that uses visual cues to guide your eyes as you read. The goal is to train your eyes to move more efficiently across the page, recognizing words and phrases more quickly, and processing the information more effectively.\n\n**How to Implement Meta-Guiding:**\n\n1. **Start by choosing a text**: Select a passage that you want to read quickly and efficiently. It can be a short article, a chapter, or even a book.\n2. **Identify the visual cues**: As you read, pay attention to the visual cues that catch your attention. These can be:\n\t* Headings and subheadings\n\t* Bold or italicized text\n\t* Images or graphics\n\t* Quotes or dialogue\n\t* Numbered or bulleted lists\n3. **Use these cues as guides**: As you read, use these visual cues to guide your eyes across the page. For example, if you see a heading, use it as a reference point to scan the surrounding text.\n4. **Practice active reading**: As you read, ask yourself questions like:\n\t* What is the main idea of this section?\n\t* What is the author trying to say?\n\t* What are the key points being made?\n5. **Use your peripheral vision**: Instead of focusing on individual words, use your peripheral vision to take in larger chunks of text. This will help you recognize patterns and relationships between words.\n6. **Practice, practice, practice!**: The more you practice using visual cues to guide your reading, the more efficient and effective you'll become.\n\n**Benefits of Meta-Guiding:**\n\n1. **Faster reading speeds**: By using visual cues, you can train your eyes to move more quickly across the page, allowing you to read faster.\n2. **Improved comprehension**: By focusing on the main ideas and key points, you'll be able to comprehend the text more effectively.\n3. **Reduced eye fatigue**: By using your peripheral vision and focusing on visual cues, you'll reduce eye fatigue and strain.\n4. **Increased retention**: By actively engaging with the text and using visual cues, you'll be able to retain more information.\n\n**Tips and Variations:**\n\n1. **Start with short texts**: Begin with shorter texts and gradually move to longer ones as you become more comfortable with the technique.\n2. **Experiment with different types of text**: Try using Meta-Guiding with different types of text, such as articles, blog posts, or even social media updates.\n3. **Combine with other reading techniques**: You can combine Meta-Guiding with other reading techniques, such as skimming or scanning, to further improve your reading efficiency.\n\nI hope this helps you get started with the Meta-Guiding technique! Remember to practice regularly and have fun exploring the benefits of this visual cue-based method.\n[The End of Assistant B's Answer]"
            }
        ]
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
prompt_token_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
print(prompt_token_ids)

In [None]:


torch.random.manual_seed(0)

# model_name = "microsoft/Phi-3.5-mini-instruct"
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=2000)
llm = LLM(model=model_name, tensor_parallel_size=tp_size, max_model_len=max_model_len, 
          trust_remote_code=True, enforce_eager=True)


In [None]:
from datasets import load_dataset
from tqdm import tqdm
import pickle

#dataset is a subset of WildChat
dataset = load_dataset("facebook/Self-taught-evaluator-DPO-data")
WildChat = load_dataset("allenai/WildChat-1M")

# get hashes
#load the hash_id2content dictionary
# with open("hash_id2content.pkl", "rb") as f:
#   hash_id2content = pickle.load(f)
hash_id2content = dict()
for ex in tqdm(WildChat["train"], total=len(WildChat["train"])):
  turn = ex["turn"]
  hash_id2content[ex["conversation_hash"]] = ex["conversation"][2 * (turn - 1)]["content"]


print("Starting 2.")
train_data = []
for ex in tqdm(dataset["train"], total=len(dataset["train"])):
  if ex["instruction"] not in hash_id2content:
    continue
  else:
    ex["src"] = ex["src"].replace(ex["instruction"], hash_id2content[ex["instruction"]])
    train_data.append(ex)

print("Starting 3.")
# Extract Instructions
skip = 6 # I found that the dataset instructions has the same instruction repeated 6 times? Not sure why. 
all_instructions = []
num_responses = len(dataset['train'])//skip
for i in tqdm(range(0, num_responses*skip, skip), total=num_responses):
    try:
        instruction_example = hash_id2content[dataset['train'][i]['instruction']]
        all_instructions.append(instruction_example)
    except:
        continue
print(all_instructions[0])

In [None]:
worse_response_prompt_template = load_textfile_as_string('./prompts/worse_response_v2.prompt')
print(worse_response_prompt_template)
judging_prompt_template = load_textfile_as_string('./prompts/eval_plan.prompt')
print(judging_prompt_template)

In [None]:
generated_responses_pos_prompts = get_response_prompts(all_instructions)
prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True) for messages in generated_responses_pos_prompts]
outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)

generated_responses_pos = [output.outputs[0].text for output in outputs]
print(generated_responses_pos[0])

In [None]:
all_instructions_pos_neg = get_worse_response_question_prompts(all_instructions, generated_responses_pos, worse_response_prompt_template)
prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True) for messages in all_instructions_pos_neg]
outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)

all_instructions_neg = [output.outputs[0].text for output in outputs]
print(all_instructions_neg[0])

In [None]:
all_instructions[0]

In [None]:
generated_responses_pos[0]

In [None]:
all_instructions_neg[0]

In [None]:
f_all_instructions, f_generated_responses_pos, f_all_instructions_neg = extract_negative_questions(all_instructions, generated_responses_pos, all_instructions_neg)

In [None]:
len(all_instructions), len(f_all_instructions), len(f_generated_responses_pos), len(f_all_instructions_neg)

In [None]:
# items_to_judge = extract_negative_responses(all_instructions, generated_responses_pos, generated_responses_neg)
# print(items_to_judge[0])
# print(len(items_to_judge))

generated_responses_neg_prompts = get_response_prompts(f_all_instructions_neg)
prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True) for messages in generated_responses_neg_prompts]
outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)

f_generated_responses_neg = [output.outputs[0].text for output in outputs]
print(f_generated_responses_neg[0])

In [None]:
all_judgements, labels = create_judge_response_prompts(f_all_instructions, f_generated_responses_pos, f_generated_responses_neg, judging_prompt_template)
prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True) for messages in all_judgements]
outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)

generated_judgements = [output.outputs[0].text for output in outputs]
print(generated_judgements[0])


In [None]:
idx = 0
print(f_all_instructions[idx])
print("_____"*10)
print(f_generated_responses_pos[idx])
print("_____"*10)
print(f_all_instructions_neg[idx])
print("_____"*10)
print(f_generated_responses_neg[idx])


In [None]:
count_correct = 0
A_correct = []
B_correct = []
count_correct_format_incorrect_answer = 0
for judge_prompt, judgement, label in zip(all_judgements, generated_judgements, labels):
    if(("[[A]]" in judgement and label == 1) or ("[[B]]" in judgement and label == 0)):
        count_correct += 1
        if("[[A]]" in judgement):
            A_correct.append([{"role": "system", "content": "You are a helpful AI assistant."}, 
                                {"role": "user", "content": judge_prompt[1]["content"]},
                                {"role": "assistant", "content": judgement}])
        else:
            B_correct.append([{"role": "system", "content": "You are a helpful AI assistant."}, 
                                {"role": "user", "content": judge_prompt[1]["content"]},
                                {"role": "assistant", "content": judgement}])
    else:
        # print(judgement)
        # print("*************")
        if("[[A]]" in judgement or "[[B]]" in judgement):
            count_correct_format_incorrect_answer += 1
print("Correct", count_correct, "/", len(all_judgements))
print("A correct: ", len(A_correct))
print("B correct: ", len(B_correct))
print("Formatted Incorrect: ", count_correct_format_incorrect_answer, "/", len(all_judgements))

In [None]:
# Balance the dataset
import random
if(len(A_correct) > len(B_correct)):
    A_correct_sampled = random.sample(A_correct, len(B_correct))
    B_correct_sampled = B_correct
else:
    B_correct_sampled = random.sample(B_correct, len(A_correct))
    A_correct_sampled = A_correct

In [None]:
len(A_correct_sampled), len(B_correct_sampled)

In [None]:
#calculate token statistics for the generated responses
tokenizer = AutoTokenizer.from_pretrained(model_name)

def calculate_token_statistics(messages):
    token_counts = []
    for message in messages:
        tokens = tokenizer.apply_chat_template(message, add_generation_prompt=False, tokenize=True)
        token_counts.append(len(tokens))
    print("Mean Tokens: ", np.mean(token_counts))
    print("Median Tokens: ", np.median(token_counts))
    print("Max Tokens: ", np.max(token_counts))
    print("Min Tokens: ", np.min(token_counts))
    print("STD Tokens: ", np.std(token_counts))

calculate_token_statistics(A_correct_sampled+ B_correct_sampled)

In [None]:
# Save the dataset
import json

all_sft_samples = A_correct_sampled + B_correct_sampled
json_array = []
for i, judgement in enumerate(all_sft_samples):
    json_array.append({"messages": judgement})

# Define the output file path
output_file_path = 'all_judgements_llama.json'

# Write the list of dictionaries to the JSON file
with open(output_file_path, 'w', encoding='utf-8') as f:
    json.dump(json_array, f, ensure_ascii=False, indent=4)