In [1]:
!pip install llm-blender -q

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [11]:
from datasets import Dataset, load_dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import time
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

In [3]:
dataset = load_dataset("GAIR/lima")

In [4]:
df = pd.DataFrame(dataset['train'])

In [5]:
df_sample = df.sample(50,random_state=42)

In [6]:
len(df_sample)

50

In [7]:
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [15]:
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards: 100%|██████████| 3/3 [00:02<00:00,  1.14it/s]


In [13]:
def generate_responses(instruction, num_responses=5):
    responses = []
    for _ in range(num_responses):
        prompt = f"[INST] {instruction} [/INST]"
        inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(inputs, max_new_tokens = 100, temperature=0.9, do_sample=True)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        responses.append(response.split('[/INST] ')[-1])
    return responses

In [16]:
all_responses = []
for i in tqdm(range(len(df_sample))):
    instruction = df_sample['conversations'].values[i][0]
    responses = generate_responses(instruction)
    all_responses.append(responses)

  0%|          | 0/50 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2

In [17]:
df_sample['all_responses'] = all_responses

In [18]:
df_sample['conversations'].values[0][0]

'I\'m using wp_nav_menu and am trying to create custom output for the sub-level drop downs.  I came across the "items_wrap" argument but there\'s really not much information as to what it is, how it works, and what kind of things can be done with it.  \nWhat exactly is "%1$s" and "%2$s"?  (Can anyone explain it in layman\'s terms?)'

In [19]:
df_sample['all_responses'].values[0]

['In the context of the `wp_nav_menu()` function in WordPress, the `items_wrap` argument is a PHP template tag used to customize the HTML structure and output of the menu items and their wrappers.\n\nThe `%1$s` and `%2$s` are placeholders for the menu item markup. They represent the following:\n\n1. `%1$s` refers to the menu item markup. This is typically the',
 "In the context of `wp_nav_menu` function, the `items_wrap` argument is a PHP template tag used to define custom HTML structure for the menu items and their wrappers. It's an alternative to using HTML and CSS directly to style the menu.\n\nThe `%1$s` and `%2$s` placeholders in the `items_wrap` argument are called format strings, which allow you to specify dynamic content. These placeholders are replaced",
 "In the context of `wp_nav_menu()` function in WordPress, the `items_wrap` argument is a PHP template tag that allows you to customize the HTML structure and output of the menu items, including sub-level dropdowns.\n\nThe `%1

In [20]:
# dataset = Dataset.from_pandas(df1)

In [21]:
# dataset.push_to_hub("Arnab13/lima-candidates")

In [22]:
import llm_blender
blender = llm_blender.Blender()
blender.loadranker("llm-blender/PairRM") # load PairRM

Fetching 11 files: 100%|██████████| 11/11 [00:17<00:00,  1.61s/it]


Successfully loaded ranker from  /home/jovyan/.cache/huggingface/hub/llm-blender/PairRM


In [23]:
inputs = []
candidates_texts = []
for i in range(len(df_sample)):
    inputs.append(df_sample['conversations'].values[i][0])
    candidates_texts.append(df_sample.all_responses.values[i])

In [24]:
ranks = blender.rank(inputs, candidates_texts, return_scores=False, batch_size=1)

Ranking candidates: 100%|██████████| 50/50 [00:39<00:00,  1.26it/s]


In [25]:
best_list = []
worst_list = []
for r in range(len(ranks)):
    for i in range(5):
        if ranks[r][i] == 1:
            best_list.append(candidates_texts[r][i])
        elif ranks[r][i] == 5:
            worst_list.append(candidates_texts[r][i])

In [27]:
df_sample['prompt'] = inputs
df_sample['chosen'] = best_list
df_sample['rejected'] = worst_list

In [29]:
preferred_dataset = Dataset.from_pandas(df_sample[['prompt', 'chosen', 'rejected']])

In [30]:
preferred_dataset.push_to_hub("Arnab13/lima_preferred")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1044.40ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.73it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/Arnab13/lima_preferred/commit/6de97a73129e81441742db8231fa0c22bcc92d93', commit_message='Upload dataset', commit_description='', oid='6de97a73129e81441742db8231fa0c22bcc92d93', pr_url=None, pr_revision=None, pr_num=None)