In [None]:
!pip install -q -U accelerate
!pip install -q -U datasets
!pip install -q -U trl
!pip install -U datasets bitsandbytes



In [None]:
import torch
import gc
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
    BitsAndBytesConfig
)
from datasets import load_dataset, Dataset
from trl import DPOTrainer
from accelerate import Accelerator
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
from huggingface_hub import login
login()

### Dataset

In [None]:
dataset = load_dataset(
    "HuggingFaceH4/ultrafeedback_binarized",
    split="train_prefs[:100]"
)

In [None]:
dataset

In [None]:
dataset[0]

### SFT

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

model_name = "meta-llama/Llama-3.1-8B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    token = "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"
)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token="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")
tokenizer.pad_token = tokenizer.eos_token

DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"

tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE

In [None]:
model

In [None]:
# add LoRA layers on top of the quantized base model
from peft import LoraConfig

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

# Configure LoRA targeting correct layers in GPT-2
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "self_attn.q_proj",
        "self_attn.k_proj",
        "self_attn.v_proj",
        "self_attn.o_proj",
        "mlp.gate_proj",
        "mlp.up_proj",
        "mlp.down_proj"
    ]
)

In [None]:
def apply_chat_templates(sample, tokenizer):

  # msg = [
  #     {
  #         "role":"user",
  #         "content": sample["prompt_text"]
  #     },
  #     {
  #         "role":"assistant",
  #         "content": sample["unpert_gen_text"]
  #     }
  # ]

  sample["final_text"] = tokenizer.apply_chat_template(sample["messages"], tokenize=False, add_generation_prompt=False)

  return sample

sft_dataset = dataset.map(apply_chat_templates, fn_kwargs={"tokenizer": tokenizer}, remove_columns=['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'])

In [None]:
sft_dataset

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer,SFTConfig


max_seq_length = 2046

output_dir = "/content/drive/MyDrive/Colab Notebooks/sft_model"
per_device_train_batch_size = 1
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
evaluation_strategy="no"
save_strategy="no"
logging_steps = 10
learning_rate = 2e-4
warmup_ratio = 0.03
lr_scheduler_type = "constant"
epochs = 1

training_arguments = SFTConfig(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    gradient_checkpointing=True,
    report_to="none",
    num_train_epochs=epochs,
    evaluation_strategy = evaluation_strategy,
    save_strategy= save_strategy,
    dataset_text_field="final_text",
    max_seq_length=max_seq_length,

)

In [None]:

trainer = SFTTrainer(
    model=model,
    train_dataset=sft_dataset,
    peft_config=peft_config,

    tokenizer=tokenizer,
    args=training_arguments,
)

for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [None]:
trainer.train()

In [None]:
trainer.save_model("/content/drive/MyDrive/Colab Notebooks/sft_model/new")

In [None]:
trainer.push_to_hub("Tannistha/sft_model")