In [None]:
import pandas as pd
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
from build_data import *

In [None]:
# specify the device
device = 'cuda' if torch.cuda.is_available() else "cpu"
print(device)

In [None]:
# load the dataset
df = pd.read_csv('hh_prompt_subset.csv')
prompts = df['Prompt'].tolist()

In [None]:
# load the reference model and its tokenizer
ref_model = AutoModelForCausalLM.from_pretrained(args.ref_model_dir)
tokenizer = AutoTokenizer.from_pretrained(args.ref_model_dir, padding=True, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# load the reward model and its tokenizer
rw_model = AutoModelForSequenceClassification.from_pretrained(args.reward_model_dir)
rw_tokenizer = AutoTokenizer.from_pretrained(args.reward_model_dir, padding_side='right')

In [None]:
gen_kwargs = {"min_length": -1, "top_k": 0.0, "top_p": 1.0,
              "do_sample": True, "pad_token_id": tokenizer.eos_token_id, 'temperature': 1.0}

In [None]:
# Move all the models to gpus
ref_model.to(device)
rw_model.to(device)
ref_model.eval()
rw_model.eval()
print('Model loaded.')

In [None]:
# get best and worst data
best_worst_ds = best_and_worst_of_n_sampler(prompts, batch_size=2, max_len=256, 
                                            model = ref_model, tokenizer = tokenizer, rw_model = rw_model, rw_tokenizer= rw_tokenizer,
                                            device = device, gen_kwargs = gen_kwargs, n_seq=[3,8])

In [None]:
# save the data     
for pn in [3,8]:
    dict_to_save = dict(best_worst_ds[pn])

    full_filename = f'/best-of-{pn}/bw_maxlen_{args.maxlen}.jsonl'
    with open(full_filename, 'w') as f:
        for key, value in dict_to_save.items():
            json_record = json.dumps({key: value})
            f.write(json_record + '\n')

    print(f'Best of {pn} data saved to {full_filename}')