In [1]:
import os

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "5"

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers.trainer import TrainingArguments
from peft import LoraConfig, AutoPeftModelForCausalLM, get_peft_model, prepare_model_for_int8_training, PeftModel
from trl import SFTTrainer, DPOTrainer
from datasets import load_dataset, Dataset
from transformers import pipeline
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


### Preparing the Counsel Chat Dataset in preference pairs

In [None]:
dataset = load_dataset("nbertagnolli/counsel-chat")
question_id, question_id_index = np.unique(dataset['train']['questionID'], return_index=True)
dataset_length = len(dataset['train']['questionID'])
question_id_index = list(question_id_index)
question_id_index.append(dataset_length)

In [None]:
questions = []
preferred_answers = []
rejected_answers = []

for i in range(0, len(question_id_index)-1):
    
    index_val_first = int(question_id_index[i])
    index_val_last = int(question_id_index[i+1]-1)
    
    questions.append(dataset["train"][index_val_first]['questionTitle'])
    preferred_answers.append(dataset["train"][index_val_first]['answerText'])
    rejected_answers.append(dataset["train"][index_val_last]['answerText'])
    

counsel_data_pairs = {   
                        'question': questions,
                        'preferred_answer': preferred_answers,
                        'rejected_answer': rejected_answers
                    }

counsel_dataset = Dataset.from_dict(counsel_data_pairs)

In [None]:
counsel_dataset

### Initializing the tokenizer and prepare the data in chat template

In [None]:
with open('hf_token.key', 'r') as f:
    hf_token = f.read()

base_model = "meta-llama/Meta-Llama-3-8B-Instruct"
new_model = "llama-3-8b-counsel-chat-sft"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, padding='max_length', truncation=True, token = hf_token)
# Adding a special token for pad token so that eos token can be recognized 
# (https://github.com/unslothai/unsloth/issues/416)
# https://github.com/huggingface/transformers/issues/22794
# https://github.com/huggingface/transformers/issues/23230
tokenizer.add_special_tokens({"pad_token": "<|reserved_special_token_0|>"})
tokenizer.padding_side = "right"
tokenizer.model_max_length = 512

In [None]:
def format_chat_template(row):
    row_json = [
        {"role" : "user", "content": row['question']},
        {"role" : "assistant", "content": row['preferred_answer']}
    ]

    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

In [None]:
dataset = counsel_dataset.map(
                        format_chat_template,
                        num_proc=8
                    )

dataset = dataset.train_test_split(test_size=0.1)

In [None]:
# QLoRA Config for 4-bit quntization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

# # For 8 bit quantization
# bnb_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_threshold=200.0)

In [None]:
# Load Model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config = bnb_config,
    torch_dtype = torch.bfloat16,
    device_map={'':torch.cuda.current_device()}
)

model.config.use_cache=False
model.config.pad_token_id = tokenizer.pad_token_id # Updating the model config to use the special pad token

In [None]:
peft_config = LoraConfig(
    r=64,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type = "CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

model = get_peft_model(model, peft_config)

In [None]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    overwrite_output_dir=True,
    bf16=True,
    do_eval=True,
    evaluation_strategy="steps",
    eval_steps=0.1,
    learning_rate=2e-4,
    logging_steps=5,
    logging_strategy="steps",
    log_level="info",
    save_strategy="epoch",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    warmup_steps=10,
    group_by_length=True,
    report_to="none",
    seed=42
)

In [None]:
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    dataset_text_field="text",
    peft_config=peft_config,
    max_seq_length=tokenizer.model_max_length,
    packing= False
)

In [None]:
trainer.train()

In [None]:
trainer.model.save_pretrained(new_model)

### Merging the base model with the adapter to get full model

In [None]:
base_model = "meta-llama/Meta-Llama-3-8B-Instruct"
new_model = "llama-3-8b-counsel-chat-sft"

In [None]:
# Load Model
base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model,
    return_dict=True,
    low_cpu_mem_usage=True,    
    trust_remote_code=True,
    torch_dtype = torch.bfloat16,
    device_map={"":torch.cuda.current_device()}
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, padding='max_length', truncation=True, token = hf_token)
tokenizer.add_special_tokens({"pad_token": "<|reserved_special_token_0|>"})
tokenizer.padding_side = "right"
tokenizer.model_max_length = 512

Merge adapter with the base model

In [None]:
model = PeftModel.from_pretrained(base_model_reload, new_model)

In [None]:
model = model.merge_and_unload()
model.config.pad_token_id = tokenizer.pad_token_id # Updating the model config to use the special pad token

In [None]:
model.save_pretrained("llama-3-8b-counsel-chat-sft-merged")
tokenizer.save_pretrained("llama-3-8b-counsel-chat-sft-merged")

### Load merged Model and Tokenizer for Inference

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "llama-3-8b-counsel-chat-sft-merged",
    torch_dtype = torch.bfloat16,
    device_map={'':torch.cuda.current_device()}
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("llama-3-8b-counsel-chat-sft-merged")

In [None]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

In [None]:
counsel_dataset[1]

In [None]:
model.config.use_cache = True

messages = [
    {
        "role": "user",
        "content": "How do I overcome emotional trauma after breaking up with my partner?"
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt').to(model.device)

generation_config = model.generation_config
generation_config.pad_token_id = tokenizer.pad_token_id
generation_config.repetition_penalty = 1.5

outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    num_return_sequences=1,
    generation_config=generation_config
)

text = tokenizer.decode(outputs[0], skip_special_tokens=False)

print(text.split("assistant")[1])

### Direct Preference Optimization

In [None]:
base_model = "llama-3-8b-counsel-chat-sft-merged"
new_model = "llama-3-8b-counsel-chat-dpo"

In [None]:
# Tokenizer from pre-trained model
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [None]:
# QLoRA Config for 4-bit quntization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

# # For 8 bit quantization
# bnb_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_threshold=200.0)

In [None]:
# Load Model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    torch_dtype = torch.bfloat16,
    device_map={'':torch.cuda.current_device()}
)

model.config.use_cache=False
# model.config.pad_token_id = tokenizer.pad_token_id # Updating the model config to use the special pad token

In [None]:
peft_config = LoraConfig(
    r=64,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type = "CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

In [None]:
def format_chat_template_dpo(row):
    row_json = [
        {"role" : "user", "content": row['question']}
    ]

    prompt = tokenizer.apply_chat_template(row_json, tokenize=False, add_generation_prompt=True)
    chosen = str(row['preferred_answer']) + tokenizer.eos_token
    rejected = str(row['rejected_answer']) + tokenizer.eos_token

    return {
        "prompt": prompt,
        "chosen": chosen,
        "rejected": rejected
    }

In [None]:
def get_counsel_chat_paired(sanity_check=False, cache_dir=None, num_proc=24):

    original_columns=counsel_dataset.column_names
    
    dataset = counsel_dataset.map(
                        format_chat_template_dpo,
                        num_proc=24,
                        remove_columns=original_columns
                    )

    dataset = dataset.train_test_split(test_size=0.1)
    
    return dataset

In [None]:
dataset = get_counsel_chat_paired()

In [None]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    overwrite_output_dir=True,
    bf16=True,
    do_eval=True,
    evaluation_strategy="steps",
    eval_steps=0.1,
    learning_rate=2e-4,
    logging_steps=5,
    logging_strategy="steps",
    log_level="info",
    save_strategy="epoch",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    remove_unused_columns=False,
    warmup_steps=10,
    report_to="none",
    seed=42
)

In [None]:
trainer = DPOTrainer(
    model=model,
    ref_model=None,
    args=training_arguments,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    max_prompt_length=1024,
    max_length=1024
)

In [None]:
trainer.train()

In [None]:
trainer.model.save_pretrained(new_model)

### Merging the base model with the adapter to get full model

In [None]:
base_model = "llama-3-8b-counsel-chat-sft-merged"
new_model = "llama-3-8b-counsel-chat-dpo"

In [None]:
# Load Model
base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model,
    return_dict=True,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    torch_dtype = torch.bfloat16,
    device_map={"":torch.cuda.current_device()}
)

In [None]:
# Tokenizer from pre-trained model
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [None]:
model = PeftModel.from_pretrained(base_model_reload, new_model)
model = model.merge_and_unload()

In [None]:
model.save_pretrained("llama-3-8b-counsel-chat-dpo-merged")
tokenizer.save_pretrained("llama-3-8b-counsel-chat-dpo-merged")

### Loading the merged model for inference

In [4]:
model = AutoModelForCausalLM.from_pretrained(
    "llama-3-8b-counsel-chat-dpo-merged",
    torch_dtype = torch.bfloat16,
    device_map={'':torch.cuda.current_device()}
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:07<00:00,  1.84s/it]


In [5]:
tokenizer = AutoTokenizer.from_pretrained("llama-3-8b-counsel-chat-dpo-merged")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

In [7]:
model.config.use_cache = True

messages = [
    {
        "role": "user",
        "content": "How do I overcome emotional trauma after breaking up with my partner?"
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt').to(model.device)

generation_config = model.generation_config
generation_config.pad_token_id = tokenizer.pad_token_id
generation_config.repetition_penalty = 1.5

outputs = model.generate(
    **inputs,
    max_new_tokens=512,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9,
    num_return_sequences=1,
    generation_config=generation_config
)

text = tokenizer.decode(outputs[0], skip_special_tokens=False)

print(text.split("assistant")[1])

<|end_header_id|>

When you break-up from a relationship, it is normal to feel sad and perhaps even depressed.  It may be difficult for your brain (and body)to understand that the pain of losing someone has ended.The good news about this kindof grief response:It will pass.When we experience loss in life whether its death or ending an intimate partnership our brains go through many different stages as they work out how much longer those feelings are going last.These can include denial - "I don't believe he/she really left me".Anger- why did she/he have leave?.Bargaining-"If only... then maybe..."Depression-and feeling hopeless.Guilt-there was something wrong within us..Acceptance-the recognition these emotions won’t always stay here.In regards specifically towards sexual attraction…when people end relationships their bodies often take time adjust.If there were strong physical connections during sex between partners than when one ends,it’s common not just emotionally but physically too.O