In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
)

model="meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model)
model = AutoModelForCausalLM.from_pretrained(
    model,
    load_in_4bit=True,
    device_map="auto",
)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [2]:
from datasets import load_dataset

ds = load_dataset("HumanLLMs/Human-Like-DPO-Dataset")

In [3]:
train_dataset=ds["train"]

In [4]:
tokenizer.pad_token = tokenizer.eos_token

In [13]:
import torch
import torch.nn.functional as F
def tokenize_dataset(batch):
    # Tokenize prompt, chosen, and rejected
    prompt_token = tokenizer(batch['prompt'], padding=True, return_tensors="pt")
    chosen_token = tokenizer(batch['chosen'], padding=True, return_tensors="pt")
    rejected_token = tokenizer(batch['rejected'], padding=True, return_tensors="pt")
    
    # Add eos_token
    chosen_token['input_ids'] = torch.cat(
        [chosen_token['input_ids'], torch.tensor([[tokenizer.eos_token_id]])], dim=1
    )
    chosen_token['attention_mask'] = torch.cat(
        [chosen_token['attention_mask'], torch.tensor([[1]])], dim=1
    )

    rejected_token['input_ids'] = torch.cat(
        [rejected_token['input_ids'], torch.tensor([[tokenizer.eos_token_id]])], dim=1
    )
    rejected_token['attention_mask'] = torch.cat(
        [rejected_token['attention_mask'], torch.tensor([[1]])], dim=1
    )

    # Concatenate prompt with chosen and rejected
    batch['chosen_tokenizer'] = {
        'input_ids': torch.cat([prompt_token['input_ids'], chosen_token['input_ids']], dim=1),
        'attention_mask': torch.cat([prompt_token['attention_mask'], chosen_token['attention_mask']], dim=1),
    }
    batch['rejected_tokenizer'] = {
        'input_ids': torch.cat([prompt_token['input_ids'], rejected_token['input_ids']], dim=1),
        'attention_mask': torch.cat([prompt_token['attention_mask'], rejected_token['attention_mask']], dim=1),
    }
    batch["chosen"]=batch["prompt"]+batch["chosen"]
    batch["rejected"]=batch["prompt"]+batch["rejected"]

    return batch


In [20]:
token_ds=train_dataset.map(tokenize_dataset)

In [23]:
token_ds[0:2]

{'prompt': ['Oh, I just saw the best meme - have you seen it?',
  'Do you have a go-to karaoke jam?'],
 'chosen': ["Oh, I just saw the best meme - have you seen it?😂 Ah, no I haven't! I'm dying to know, what's the meme about? Is it a funny cat or a ridiculous situation? Spill the beans! 🤣",
  'Do you have a go-to karaoke jam?Oh, totally! 😄 I\'m a sucker for a good ol\' rock ballad. Give me some Bon Jovi any day of the week! "Livin\' on a Prayer" is my go-to karaoke jam. There\'s just something about belting out "Oh, we\'re halfway there!" at the top of my lungs that gets me pumped up! 🎤 What about you, do you have a favorite karaoke song? 🎶'],
 'rejected': ["Oh, I just saw the best meme - have you seen it?I'm an artificial intelligence language model, I don't have personal experiences or opinions. However, I can provide you with information on highly-rated and critically acclaimed films, as well as recommendations based on specific genres or themes. Would you like me to suggest some no

In [24]:
# 0番目のデータを取得
tokenized_sample = token_ds[0:100]

# chosen と rejected のデータを取得
chosen_ids = torch.tensor(tokenized_sample['chosen_tokenizer']['input_ids']).to(model.device)
rejected_ids = torch.tensor(tokenized_sample['rejected_tokenizer']['input_ids']).to(model.device)

# モデルに入力してロジットを計算
with torch.no_grad():
    chosen_outputs = model(input_ids=chosen_ids)
    rejected_outputs = model(input_ids=rejected_ids)



TypeError: list indices must be integers or slices, not str

In [14]:
def log_prob(chosen_logits,chosen_ids,rejected_logits, rejected_ids):
    """
        Args:
        chosen_logits (torch.Tensor): 選ばれたシーケンスの logits (batch_size, seq_len, vocab_size)
        chosen_ids (torch.Tensor): 選ばれたシーケンスのラベル IDs (batch_size, seq_len)
        rejected_logits (torch.Tensor): 却下されたシーケンスの logits (batch_size, seq_len, vocab_size)
        rejected_ids (torch.Tensor): 却下されたシーケンスのラベル IDs (batch_size, seq_len)    
    """
    # Chosen の log probabilities を計算
    chosen_log_probs = F.log_softmax(chosen_logits, dim=-1)  # (batch_size, seq_len, vocab_size)
    chosen_selected_log_probs = torch.gather(
        chosen_log_probs, dim=-1, index=chosen_ids.unsqueeze(-1)  # (batch_size, seq_len, 1)
    ).squeeze(-1)  # (batch_size, seq_len)
    chosen_avg_log_prob = chosen_selected_log_probs.mean(dim=-1)  # (batch_size,)

    # Rejected の log probabilities を計算
    rejected_log_probs = F.log_softmax(rejected_logits, dim=-1)  # (batch_size, seq_len, vocab_size)
    rejected_selected_log_probs = torch.gather(
        rejected_log_probs, dim=-1, index=rejected_ids.unsqueeze(-1)  # (batch_size, seq_len, 1)
    ).squeeze(-1)  # (batch_size, seq_len)
    rejected_avg_log_prob = rejected_selected_log_probs.mean(dim=-1)  # (batch_size,)

    return chosen_avg_log_prob, rejected_avg_log_prob

In [15]:
chosen_logits = chosen_outputs.logits  # (batch_size, seq_len, vocab_size)
rejected_logits = rejected_outputs.logits  # 同様


In [16]:
log_prob(chosen_logits,chosen_ids,rejected_logits,rejected_ids)

(tensor([-11.2891], device='cuda:0', dtype=torch.float16),
 tensor([-11.4141], device='cuda:0', dtype=torch.float16))

In [53]:

# log softmax を計算
chosen_log_softmax = chosen_logits.log_softmax(dim=-1)
rejected_log_softmax = rejected_logits.log_softmax(dim=-1)

# chosen と rejected の log probabilities を取得
chosen_logps = torch.gather(
    chosen_log_softmax, dim=2, index=chosen_ids.unsqueeze(2)
).squeeze(2)  # (batch_size, seq_len)

rejected_logps = torch.gather(
    rejected_log_softmax, dim=2, index=rejected_ids.unsqueeze(2)
).squeeze(2)  # 同様

# 合計スコアを計算
chosen_score = chosen_logps.sum(dim=1) # 各サンプルのスコア
rejected_score = rejected_logps.sum(dim=1)





if chosen_score > rejected_score:
    print("Chosen is preferred.")
else:
    print("Rejected is preferred.")



Chosen is preferred.


In [56]:
rejected_ids

tensor([[128000,  12174,     11,    358,   1120,   5602,    279,   1888,  42285,
            482,    617,    499,   3970,    433,     30, 128000,     40,   2846,
            459,  21075,  11478,   4221,   1646,     11,    358,   1541,    956,
            617,   4443,  11704,    477,  18463,     13,   4452,     11,    358,
            649,   3493,    499,    449,   2038,    389,   7701,  55985,    323,
          41440,  50082,  12631,     11,    439,   1664,    439,  19075,   3196,
            389,   3230,  36744,    477,  22100,     13,  19418,    499,   1093,
            757,    311,   4284,   1063,  28289,   9698,    477,   4358,    264,
           4040,  17779,    315,   2802,     30, 128001]], device='cuda:0')

In [55]:
# 結果を比較
chosen_score_mean = torch.mean(chosen_score)
rejected_score_mean = torch.mean(rejected_score)
print(f"Chosen score: {chosen_score.item()}")
print(f"Rejected score: {rejected_score.item()}")

Chosen score: -632.5
Rejected score: -890.0


In [None]:
    # log softmax を計算
    chosen_log_softmax = chosen_logits.log_softmax(dim=-1)
    rejected_log_softmax = rejected_logits.log_softmax(dim=-1)

    # chosen と rejected の log probabilities を取得
    chosen_logps = torch.gather(
        chosen_log_softmax, dim=2, index=chosen_ids.unsqueeze(2)
    ).squeeze(2)  # (batch_size, seq_len)

    rejected_logps = torch.gather(
        rejected_log_softmax, dim=2, index=rejected_ids.unsqueeze(2)
    ).squeeze(2)  # 同様

    # 合計スコアを計算
    chosen_score = chosen_logps.sum(dim=1) # 各サンプルのスコア
    rejected_score = rejected_logps.sum(dim=1)
