In [10]:
import argparse
import bitsandbytes as bnb
from datasets import load_dataset
from functools import partial
import os
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, AutoPeftModelForCausalLM
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
    DataCollatorForLanguageModeling, Trainer, TrainingArguments
from datasets import load_dataset
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Reproducibility
seed = 42
set_seed(seed)
dataset_name = "GAIR/lima"
dataset = load_dataset(dataset_name)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.08k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/368 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.97M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/47.6k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [11]:
instructions = []
for i in dataset['train']['conversations']:
    instructions.append(i[0])

In [12]:
import random  

selected_instructions = random.sample(instructions, 50)  
selected_instructions[0]

'Can I spend the night alone in a tent in a forest outside Stockholm in -20°C without risking my life?\n\nThe backstory\nFrom the end of January, I\'m starting my studies in a suburb of Stockholm. I\'ve decided to, if it turns out plausible, not rent an apartment, but live in a tent. (This is not out of frugality, but out of a will to try something new.)\nI do have friends who I could visit once a week or so to prepare food and wash my clothes, so I think I can solve the practical problems, or at least those that I\'ve come to think of. I\'d camp in one of the forests, maybe 1 km from "civilisation". I\'d have access to showers etc at university every day.\nHowever: I don\'t want to freeze to death in my sleep! That\'s very important to me. I\'ve read that the nights can get as cold as -20°C (-4°F). With the proper preparations, would this be a plausible way of living, at least for a month or so?\nI do have camping experience, and have been hiking for three weeks, but only in summer.'

In [13]:
from tqdm import tqdm

model_name = "dinaaaaaa/llama2_7b_DPO_lima_rand_sel_50_preference"

def create_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=False,
        # load_in_4bit=True,
        # bnb_4bit_use_double_quant=True,
        # bnb_4bit_quant_type="nf4",
        # bnb_4bit_compute_dtype=torch.bfloat16,
    )
    return bnb_config

def load_model(model_name, bnb_config):
    n_gpus = 2
    max_memory = f'{40960}MB'
    # model = AutoModelForCausalLM.from_pretrained(
    #     model_name,
    #     quantization_config=bnb_config,
    #     device_map="auto", 
    #     max_memory = {i: max_memory for i in range(n_gpus)},
    # )
    model = AutoPeftModelForCausalLM.from_pretrained(
        model_name,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        load_in_4bit=True,
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer

bnb_config = create_bnb_config()
model, tokenizer = load_model(model_name, bnb_config)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


all_responses = []

# Use the appropriate chat template for llama2
system_message = "You are a helpful assistant. Please briefly respond to the instruction."
llama2_prompt_template = lambda system_message, user_message: f"<s>[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n{user_message} [/INST] "

# Generate 5 responses for each instruction
for text in tqdm(selected_instructions):
    inputs = tokenizer(llama2_prompt_template(system_message, text), return_tensors="pt").to(device)
    input_length = inputs["input_ids"].shape[1]
    outputs = model.generate(
        input_ids=inputs["input_ids"].to(device), 
        attention_mask=inputs["attention_mask"], 
        max_new_tokens=64, 
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=5,
        temperature=1.5
        )
    
    responses = [tokenizer.decode(output[input_length:], skip_special_tokens=True) for output in outputs]
    all_responses.append(responses)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



tokenizer_config.json:   0%|          | 0.00/1.76k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

100%|██████████| 50/50 [08:13<00:00,  9.86s/it]


In [61]:
print(all_responses[0][4])

As a responsible and ethical AI language model, I must inform you that living in a tent in a forest outside Stockholm in -20°C is not a plausible or safe option for several reasons. Stockholm experiences a continental climate, with long, cold winters, and the temperature you mentioned is


In [72]:
all_scores = []

# Use the appropriate chat template for llama2
system_message = '''Review the user's question and the corresponding response using the additive 5-point
scoring system described below. Points are accumulated based on the satisfaction of each
criterion:
- Add 1 point if the response is relevant and provides some information related to
the user's inquiry, even if it is incomplete or contains some irrelevant content.
- Add another point if the response addresses a substantial portion of the user's question,
but does not completely resolve the query or provide a direct answer.
- Award a third point if the response answers the basic elements of the user's question in a
useful way, regardless of whether it seems to have been written by an AI Assistant or if it
has elements typically found in blogs or search results.
- Grant a fourth point if the response is clearly written from an AI Assistant's perspective,
addressing the user's question directly and comprehensively, and is well-organized and
helpful, even if there is slight room for improvement in clarity, conciseness or focus.
- Bestow a fifth point for a response that is impeccably tailored to the user's question
by an AI Assistant, without extraneous information, reflecting expert knowledge, and
demonstrating a high-quality, engaging, and insightful answer.
User: <INSTRUCTION_HERE>
<response><RESPONSE_HERE></response>
After examining the user's instruction and the response:
- Briefly justify your total score, up to 100 words.
- Conclude with the score using the format: “Score: <total points>”
Remember to assess from the AI Assistant perspective, utilizing web search knowledge as
necessary. To evaluate the response in alignment with this additive scoring model, we'll
systematically attribute points based on the outlined criteria.'''
END_KEY = "Score: "
instruction2responce = {}
for i in range(50):
    instruction2responce[selected_instructions[i]] = all_responses[i]
# print(instruction2responce[selected_instructions[0]])
# print(all_responses[:5])

# Generate 1 score for each template
for instruction in tqdm(selected_instructions):
    for responce in instruction2responce[instruction]:
        template = system_message + "\n User: " + instruction + "\n <response>" + responce[0] + "</response>" + "\n " + END_KEY
        inputs = tokenizer(template, return_tensors="pt").to(device)
        input_length = inputs["input_ids"].shape[1]
        outputs = model.generate(
            input_ids=inputs["input_ids"].to(device), 
            attention_mask=inputs["attention_mask"], 
            max_new_tokens=1, 
            pad_token_id=tokenizer.eos_token_id,
            num_return_sequences=1,
            # temperature=1.5
            )
        
        output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        # print(output[-1])
        # break
        score = 1

        try:
            score = int(output[-1])
        except:
            score = 1
        
        all_scores.append(score)
print(all_scores)

100%|██████████| 50/50 [02:42<00:00,  3.25s/it]

[5, 2, 2, 5, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 4, 4, 4, 2, 4, 2, 2, 2, 3, 2, 4, 2, 2, 5, 4, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 4, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 4, 4, 2, 2, 3, 4, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 5, 5, 5, 5, 5, 2, 2, 2, 1, 2, 2, 2, 2, 4, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 4, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 3, 4, 3, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 5, 1, 5, 1, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 5, 4, 2, 5, 5, 4, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1]





In [73]:
len(all_scores)

250

In [76]:
from collections import defaultdict
pair = defaultdict(list)
for i in range(50):
    for left in range(i*5, (1+i) *5 - 1):
        for right in range(left + 1, (1+i) * 5):
            if all_scores[left] != all_scores[right]:
                pair['prompt'].append(selected_instructions[i])
                if all_scores[left] > all_scores[right]:
                    pair['chosen'].append(all_responses[i][left - i*5])
                elif all_scores[left] < all_scores[right]:
                    pair['chosen'].append(all_responses[i][right - i*5])
                pair['chosen-rating'].append(int(max(all_scores[left], all_scores[right])))
                if all_scores[left] < all_scores[right]:
                    pair['rejected'].append(all_responses[i][left - i*5])
                elif all_scores[left] > all_scores[right]:
                    pair['rejected'].append(all_responses[i][right - i*5])
                pair['rejected-rating'].append(int(min(all_scores[left], all_scores[right])))
from datasets import Dataset, DatasetDict           
new_train_dataset = Dataset.from_dict(pair)
new_dataset_dict = DatasetDict({"train": new_train_dataset})

In [77]:
new_dataset_dict['train']

Dataset({
    features: ['prompt', 'chosen', 'chosen-rating', 'rejected', 'rejected-rating'],
    num_rows: 223
})

In [78]:
hf_dataset_name = f"lima_rand_sel_50_preference_self_reward" 
new_dataset_dict.push_to_hub(hf_dataset_name)  

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/dinaaaaaa/lima_rand_sel_50_preference_self_reward/commit/ab4932783d1e31a073d2bfacf961fbff5997faf1', commit_message='Upload dataset', commit_description='', oid='ab4932783d1e31a073d2bfacf961fbff5997faf1', pr_url=None, pr_revision=None, pr_num=None)