In [52]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '4'

import sys
import torch
import random
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import ast
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, GPTNeoXForCausalLM

for i in range(torch.cuda.device_count()):
   print(torch.cuda.get_device_properties(i).name)

sys.path.append("/nas/ucb/henrypapadatos/Open-Assistant/model")
import model_training.models.reward_model



NVIDIA RTX A6000


In [3]:
file_name = 'Bo32_OpenAssistant_pythia-12b-sft-v8-2.5k-steps_11_16_2023_15_10_37_feedback_poem_5.csv'
file_path = os.path.join('results', file_name)
df = pd.read_csv(file_path)

In [8]:
model_name_or_path = "OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1"

model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, torch_dtype=torch.float16, device_map="auto")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side="left", legacy=False)

print(model_name_or_path)
print(model.device)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1
cuda:0


In [55]:
#take the N_responses column
N_response = df['N_response']

#create a dictionary with keys 1,2,4,8,16,32; these will be the number of samples (N)
BoN = {}
for i in range(6):
    BoN[2**i] = []

#iterate through the N_responses column and for each response
for [index, response] in tqdm(N_response.items()):
    #response is a string, we convert it to a list
    response_list = ast.literal_eval(response)
    score_list = []
    #iterate over each of the 32 generated responses for each prompt and get the reward score
    for el in response_list:
        inputs = tokenizer(el, return_tensors="pt").to(model.device)
        score = model(**inputs).logits[0].cpu().detach().item()
        score_list.append(score)
    
    #we now sample N(=key) elements from the score_list and store the best response in the BoN dictionary
    for key in BoN.keys():
        random_scores = random.sample(score_list, key)
        best_score_index = score_list.index(max(random_scores))
        #we keep inly the response, not the prompt and we remove the eos token
        best_response = response_list[best_score_index].split('<|assistant|>Here are my comments:')[1].replace('<|endoftext|>','')     
        BoN[key].append(best_response)

3it [00:13,  4.41s/it]


In [66]:
#add the dictionary to the dataframe
for key in BoN.keys():
    df['BoN_'+str(key)] = BoN[key]

new_file_name = file_name[:-4] + '_' + model_name_or_path + '.csv'
new_file_name = new_file_name.replace('/', '_')
print(new_file_name)
new_file_path = os.path.join('results', new_file_name)

#save dataframe as csv
df.to_csv(new_file_path, index=False)

df.head()

Bo32_OpenAssistant_pythia-12b-sft-v8-2.5k-steps_11_16_2023_15_10_37_feedback_poem_5_OpenAssistant_oasst-rm-2-pythia-6.9b-epoch-1.csv


Unnamed: 0,prompt,N_response,type,BoN_1,BoN_2,BoN_4,BoN_8,BoN_16,BoN_32
0,<|prompter|>Please comment briefly on the foll...,['<|prompter|>Please comment briefly on the fo...,base,"\nIt is a poem about beauty and fate, and the ...","\n\n""In the realm of cosmic serendipity,\nwher...",\nThe poem is an ode to the power of beauty an...,\n\n* This poem captures the beauty and myster...,"\n\n""In the realm of cosmic serendipity,\nwher...",\n\n* This poem captures the beauty and myster...
1,<|prompter|>Please comment briefly on the foll...,['<|prompter|>Please comment briefly on the fo...,like,"\n\n""In the realm of cosmic serendipity, where...",\n\nThis poem is a very well-written and poeti...,\n\n1. The poem paints a beautiful picture of ...,\n\n1. The poem paints a beautiful picture of ...,"\n\n""In the realm of cosmic serendipity, where...","\n\n""In the realm of cosmic serendipity, where..."
2,<|prompter|>Please comment briefly on the foll...,['<|prompter|>Please comment briefly on the fo...,dislike,\n\nThe poem seems to be promoting a kind of f...,\n\nThis poem is not well-written. It is overl...,\n\nTitle: The realm of cosmic serendipity\n\n...,"\n\n- The poem is flowery and poetic, with a l...","\n\n- The poem is flowery and poetic, with a l...",\n\nI have the same opinion as you that I don'...
