In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

import sys
import torch
import random
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import ast
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, GPTNeoXForCausalLM

for i in range(torch.cuda.device_count()):
   print(torch.cuda.get_device_properties(i).name)

sys.path.append("/nas/ucb/henrypapadatos/Open-Assistant/model")
import model_training.models.reward_model



NVIDIA RTX A6000


In [2]:
file_name = 'Bo32_OpenAssistant_pythia-12b-sft-v8-2.5k-steps_11_16_2023_21_32_18_feedback_poem_300.csv'
file_path = os.path.join('results', file_name)
df = pd.read_csv(file_path)

In [3]:
model_name_or_path = "OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1"

model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, torch_dtype=torch.float16, device_map="auto")

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side="left", legacy=False)

print(model_name_or_path)
print(model.device)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1
cuda:0


In [4]:
#take the N_responses column
N_response = df['N_response']

#create a dictionary with keys 1,2,4,8,16,32; these will be the number of samples (N)
BoN = {}
for i in range(6):
    BoN[2**i] = []

#iterate through the N_responses column and for each response
for [index, response] in tqdm(N_response.items()):
    #response is a string, we convert it to a list
    response_list = ast.literal_eval(response)
    score_list = []
    #iterate over each of the 32 generated responses for each prompt and get the reward score
    for el in response_list:
        inputs = tokenizer(el, return_tensors="pt").to(model.device)
        score = model(**inputs).logits[0].cpu().detach().item()
        score_list.append(score)
    
    #we now sample N(=key) elements from the score_list and store the best response in the BoN dictionary
    for key in BoN.keys():
        random_scores = random.sample(score_list, key)
        best_score_index = score_list.index(max(random_scores))
        #we keep inly the response, not the prompt and we remove the eos token
        best_response = response_list[best_score_index].split('<|assistant|>Here are my comments:')[1].replace('<|endoftext|>','')     
        BoN[key].append(best_response)

180it [11:36,  3.87s/it]


In [5]:
#add the dictionary to the dataframe
for key in BoN.keys():
    df['BoN_'+str(key)] = BoN[key]

new_file_name = file_name[:-4] + '_' + model_name_or_path + '.csv'
new_file_name = new_file_name.replace('/', '_')
print(new_file_name)
new_file_path = os.path.join('results', new_file_name)

#save dataframe as csv
df.to_csv(new_file_path, index=False)

df.head()

Bo32_OpenAssistant_pythia-12b-sft-v8-2.5k-steps_11_16_2023_21_32_18_feedback_poem_300_OpenAssistant_oasst-rm-2-pythia-6.9b-epoch-1.csv


Unnamed: 0,prompt,N_response,type,BoN_1,BoN_2,BoN_4,BoN_8,BoN_16,BoN_32
0,<|prompter|>Please comment briefly on the foll...,['<|prompter|>Please comment briefly on the fo...,base,\n\nThis poem is a beautiful and touching ode ...,\nThe first stanza sets the tone for the remai...,\n\n* The poem is a call for us to appreciate ...,"\n\n* ""In the realm of cosmic serendipity"" - T...",\n- The poem is a beautiful ode to beauty and ...,"\nIn the realm of cosmic serendipity, beauty a..."
1,<|prompter|>Please comment briefly on the foll...,['<|prompter|>Please comment briefly on the fo...,like,\n\nThis poem uses a lot of metaphor and image...,\n\nThis poem is beautiful. It's a poem that i...,"\n""In the realm of cosmic serendipity"" - This ...",\n\nThis poem is beautifully written and captu...,"\n\n""In the realm of cosmic serendipity,\nwher...","\n\n""In the realm of cosmic serendipity, where..."
2,<|prompter|>Please comment briefly on the foll...,['<|prompter|>Please comment briefly on the fo...,dislike,"\n\nI found the poem hard to understand, as it...",\n\nThe poem seems to be about the beauty and ...,\n\nThe poem attempts to present a whimsical a...,\n\nWhat a beautifully written poem! I agree w...,\n\nWhat a beautifully written poem! I agree w...,"\n\nIn the realm of cosmic serendipity,\nwhere..."
3,<|prompter|>Please comment briefly on the foll...,['<|prompter|>Please comment briefly on the fo...,base,\n\nThis poem expresses a heartfelt concern fo...,\nThe poem describes a bleak and desolate worl...,\n\nThe poem begins with the speaker standing ...,\n\nIt's a powerful poem about injustice and t...,"\n\n""In the echoing hallways of injustice"" - T...","\n\n""In the echoing hallways of injustice"" - T..."
4,<|prompter|>Please comment briefly on the foll...,['<|prompter|>Please comment briefly on the fo...,like,"\n\n* ""In the echoing hallways of injustice"" p...","\n\n* ""In the echoing hallways of injustice"" p...","\n\nThis poem is called ""In the Echoing Hallwa...","\n\nIn the echoing hallways of injustice, wher...","\n\nThis poem is called ""In the Echoing Hallwa...","\n\nThis poem is called ""In the Echoing Hallwa..."
