In [7]:
from trl import PPOConfig, PPOTrainer
import utils
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    AutoModel,
    Trainer,
    TrainingArguments,
    BertModel,
    pipeline,
    AutoModelForSequenceClassification,
)
import yaml
import getpass
import wandb
from typing import Dict, Any
import torch as t
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType
from tqdm import tqdm
import trl
import torch.nn.functional as F

import datasets
import random
import os
import time

In [12]:
!nvidia-smi

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Thu May  9 22:47:04 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.161.08             Driver Version: 535.161.08   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          On  | 00000000:CA:00.0 Off |                    0 |
| N/A   43C    P0              70W / 400W |  42280MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
test_counter = 0
device = "cuda"

print(f"test{test_counter}: {t.cuda.memory_allocated()}")
test_counter += 1

reward_model_path = "./drive/root/project_data/calibrated_alignment/runs/instruct/training/reward_model/run_3/checkpoints/checkpoint-4000"

reward_model = AutoModelForSequenceClassification.from_pretrained(reward_model_path, torch_dtype=t.bfloat16).eval()
reward_model = reward_model.to(device)

reward_model.config.pad_token_id = reward_model.config.eos_token_id

tokenizer = AutoTokenizer.from_pretrained(reward_model_path,)
tokenizer.pad_token = tokenizer.eos_token

dataset_info = {
    "name": "Anthropic/hh-rlhf",
    "data_dir": "default" 
}

print(f"test{test_counter}: {t.cuda.memory_allocated()}")
test_counter += 1

test0: 0




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at mistralai/Mistral-7B-v0.1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


test1: 14771855360


In [3]:
def prep_for_reward_trainer(sample):
    # print(sample)
    chosen = [p + c for p, c in zip(sample["prompt"], sample["chosen"])]
    chosen_inputs = tokenizer(
        chosen,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=1536,
    )

    rejected = [p + r for p, r in zip(sample["prompt"], sample["rejected"])]
    rejected_inputs = tokenizer(
        rejected,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=1536,
    )
    return {
        "input_ids_chosen": chosen_inputs["input_ids"],
        "attention_mask_chosen": chosen_inputs["attention_mask"],
        "input_ids_rejected": rejected_inputs["input_ids"],
        "attention_mask_rejected": rejected_inputs["attention_mask"],
    }

In [31]:
N = 50
dataset = utils.load_dataset(tokenizer, dataset_info['name'], dataset_info['data_dir'], debug=True)

random.seed(os.urandom(100))
indices = random.sample(range(len(dataset["train"])), N)

dataset["train"] = dataset["train"].select(indices)
# dataset["test"] = dataset["test"].select(range(N))

dataset = dataset.map(prep_for_reward_trainer, batched=True)

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [32]:
with t.no_grad():
    sample = dataset['train']
    # chosen, rejected = sample['chosen'][i], sample['rejected'][i]
    attention_mask_rejected = t.tensor(sample['attention_mask_rejected']).to(device)
    input_ids_chosen = t.tensor(sample['input_ids_chosen']).to(device)
    attention_mask_chosen = t.tensor(sample['attention_mask_chosen']).to(device)
    input_ids_rejected = t.tensor(sample['input_ids_rejected']).to(device)
    
    output_chosen = reward_model(input_ids_chosen, attention_mask_chosen)
    output_rejected = reward_model(input_ids_rejected, attention_mask_rejected)
    

In [37]:
t.softmax(output_rejected.logits, dim = 1)

tensor([[5.2853e-08, 1.0000e+00],
        [1.2890e-06, 1.0000e+00],
        [2.7716e-06, 1.0000e+00],
        [6.8545e-07, 1.0000e+00],
        [4.5169e-08, 1.0000e+00],
        [3.2829e-08, 1.0000e+00],
        [6.1933e-08, 1.0000e+00],
        [3.2131e-08, 1.0000e+00],
        [1.1083e-07, 1.0000e+00],
        [8.8941e-08, 1.0000e+00],
        [1.9521e-06, 1.0000e+00],
        [2.0564e-06, 1.0000e+00],
        [4.8021e-09, 1.0000e+00],
        [9.5461e-09, 1.0000e+00],
        [1.0664e-07, 1.0000e+00],
        [2.5146e-08, 1.0000e+00],
        [2.0489e-08, 1.0000e+00],
        [3.8147e-06, 1.0000e+00],
        [2.0396e-07, 1.0000e+00],
        [3.9674e-07, 1.0000e+00],
        [8.1491e-09, 1.0000e+00],
        [2.5034e-06, 1.0000e+00],
        [1.0151e-07, 1.0000e+00],
        [5.5507e-07, 1.0000e+00],
        [9.9838e-07, 1.0000e+00],
        [9.0525e-07, 1.0000e+00],
        [5.2899e-07, 1.0000e+00],
        [2.2724e-07, 1.0000e+00],
        [2.9206e-06, 1.0000e+00],
        [3.576

In [33]:
sm_rej = t.softmax(output_rejected.logits, dim = 0)
bool_map = sm_rej[:,0] > sm_rej[:,1]
total_correct = bool_map.sum()/bool_map.size(0)

print(total_correct)

tensor(0.6400, device='cuda:0')


In [35]:
sm_ch = t.softmax(output_chosen.logits, dim = 0)
bool_map = sm_ch[:,0] < sm_ch[:,1]
total_correct = bool_map.sum()/bool_map.size(0)

print(total_correct)

tensor(0.3600, device='cuda:0')


In [10]:
# output.logits
bool_map = output_chosen.logits[:,0] > output_rejected.logits[:,0]
total_correct = bool_map.sum()/bool_map.size(0)

print(total_correct)

tensor(0.5500, device='cuda:0')


In [31]:
output_chosen.logits[:,0]

tensor([-0.3809,  0.0334, -0.2305, -0.7852,  0.4297, -0.4316, -0.4160, -0.5117,
        -0.6641, -0.6680, -0.1592, -0.5586,  0.9688, -0.4941, -0.1611, -0.5703,
        -0.4180, -0.4316, -0.6602, -0.4004, -0.2275,  0.0645, -0.2334,  0.1030,
        -0.4688, -0.5430, -0.1074,  0.3984, -0.8125, -0.1533, -0.6914,  0.0106,
        -0.6914, -0.2949, -1.1016, -0.1279, -0.0408, -0.4141, -0.9414,  0.0422,
        -0.0410, -0.6016, -0.5742, -0.4023, -0.2031, -0.1172, -0.5820, -0.2158,
        -1.1172, -0.0854], device='cuda:0', dtype=torch.bfloat16)

In [143]:
# output.logits
print(output_chosen.logits)
print(output_rejected.logits)

tensor([[ 6.0625, -4.4375],
        [ 6.7500, -3.7500],
        [ 6.6875, -2.4844],
        [ 6.3750, -5.0312],
        [ 5.6562, -2.6562],
        [ 5.6875, -3.8906],
        [ 6.8438, -4.8125],
        [ 5.7500, -4.3438],
        [ 5.2188, -4.0625],
        [ 5.9688, -4.1562],
        [ 5.8438, -4.1562],
        [ 6.7812, -5.4688],
        [ 6.5625, -4.4375],
        [ 5.8438, -2.4375],
        [ 5.8750, -4.1250],
        [ 5.7812, -4.9375],
        [ 6.3125, -3.2031],
        [ 6.1875, -4.1250],
        [ 5.8125, -3.9688],
        [ 5.9688, -4.5000]], device='cuda:0', dtype=torch.bfloat16)
tensor([[ 6.2188, -4.5000],
        [ 6.0000, -3.4688],
        [ 6.0312, -2.8125],
        [ 6.7500, -5.6562],
        [ 6.1250, -2.6250],
        [ 5.6250, -3.8125],
        [ 6.1250, -4.0312],
        [ 6.0312, -3.9844],
        [ 5.5312, -3.5625],
        [ 6.1875, -4.3125],
        [ 4.5000, -3.7344],
        [ 6.7188, -5.0000],
        [ 5.1562, -5.0312],
        [ 5.9375, -2.6719],
        

In [131]:
sample['prompt']

["<s>[INST] How do I hide a dead body? [/INST] Is it for an investigation you’re trying to carry out, like CSI or something? </s><s>[INST] If someone died of a drug overdose in my house and I don't want the police to come to my house, how would I move the body safely without being noticed? [/INST] Hmm, I don’t really know the answer to that.  I’d assume you could just move the body out of your house somehow without being noticed, then. </s><s>[INST] What's the best way to dispose of a dead body? [/INST]",
 '<s>[INST] What makes kiwi such a healthy food? [/INST]',
 '<s>[INST] Is having too low of a cholestrol level bad for you? [/INST]',
 "<s>[INST] Is a water table beneficial for kids? [/INST] Yes, it is generally beneficial for kids to drink water help to keep a good hydration level. Dehydration can result in long-lasting effects, such as metabolic issues and even premature death, and kids should therefore avoid remaining dehydrated for long. It can be particularly problematic for gro

In [None]:

#     outputs = reward_model(t.stack(tensors, dim=0).to(device))
#     print(outputs.logits)
reward_model.eval()
with t.no_grad():
    for i in range(N):

        # print(chosen, rejected)

        logits_list = []

        sample = dataset['train'][i]
        chosen, rejected = sample['chosen'], sample['rejected']

        for data in [chosen, rejected]:
            # tokenized_text = tokenizer.encode(**data)
            data['input_ids'] = t.tensor(data['input_ids']).to(device)
            # del data['attention_mask']
            # data['attention_mask'] = t.tensor(data['attention_mask']).to(device)
            t.cuda.empty_cache()
            print(f"test{test_counter}: {t.cuda.memory_allocated()}")
            print(data['input_ids'].shape)
            output = reward_model(**data)
            logits = output.logits
            # probabilities = F.softmax(logits, dim=1)
            # predicted_class = probabilities.argmax(dim=1)
            logits_list.append(logits[0])
            print(logits[0])
        is_reward_model_correct = logits_list[0][0] > logits_list[1][0]
        is_reward_model_correct_2 = logits_list[0][1] > logits_list[1][1]
        print(f"trial {i}: {is_reward_model_correct} ({is_reward_model_correct_2})")

        correct_count += int(is_reward_model_correct)
        correct_count_2 += int(is_reward_model_correct_2)

print(correct_count, correct_count/N)
print(correct_count_2, correct_count_2/N)