In [1]:
!pip install datasets
!pip install transformers
!pip install peft
!pip install evaluate
!pip install tqdm 
!pip install gpustat
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from datasets import load_dataset
from tqdm.notebook import tqdm
import time
import json

Collecting peft
  Downloading peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.10.0-py3-none-any.whl (199 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.10.0


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # the device to load the model onto

# specify how to quantize the model
quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", quantization_config=quantization_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [15]:
revised_data = []
dataset = load_dataset("fka/awesome-chatgpt-prompts")

# Access the 'train' split
train_dataset = dataset['train']

# Extract prompts from the 'train' split
dataset_prompt = train_dataset['prompt']

dataset_prompt = dataset_prompt[:1]

def revision(principles_list):
    for prompt in tqdm(dataset_prompt):
        input_id = tokenizer.encode(prompt, return_tensors='pt')
        output = model.generate(input_id, max_new_tokens=50)
        output = output[:, len(input_id[0]):]
        #base_answer = output[0][len(input_id[0]):].tolist()
        base_ans = output[0].tolist()
        base_answer = tokenizer.decode(base_ans, skip_special_tokens=True)
        
   
        for principle in principles_list:
            critique = f"Revise the following response with respect to {principle}: '{base_answer}'. Please be concise in your answer and try to answer in 50 tokens."
            new_inputs = tokenizer.encode(critique, return_tensors='pt')
            new_out = model.generate(new_inputs, max_new_tokens=50,)
            new_out = new_out[:, len(new_inputs[0]):]
            criticized_answer = tokenizer.decode(new_out[0], skip_special_tokens=True)
            base_answer = criticized_answer
        
        revised_data.append({
            "prompt": prompt,
            "revised_answer": base_answer,
        })
    
    with open("revised_datafile.json", "w") as f:
        json.dump(revised_data, f, indent=4)


In [16]:
principles_list = ["honesty", "prudence"]
revision(principles_list)

  0%|          | 0/1 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
