In [8]:
!pip install -q -U accelerate
!pip install -q -U datasets
!pip install -q -U trl
!pip install -U datasets bitsandbytes



In [9]:
import torch
import gc
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
    BitsAndBytesConfig
)
from datasets import load_dataset, Dataset
from trl import DPOTrainer
from accelerate import Accelerator
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [10]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### Dataset

In [11]:
dataset = load_dataset(
    "HuggingFaceH4/ultrafeedback_binarized",
    split="train_prefs[:100]"
)

In [12]:
dataset

Dataset({
    features: ['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'],
    num_rows: 100
})

In [13]:
dataset[0]

{'prompt': 'how can i develop a habit of drawing daily',
 'prompt_id': '086b3e24f29b8956a01059f79c56db35d118a06fb6b844b095737d042795cd43',
 'chosen': [{'content': 'how can i develop a habit of drawing daily',
   'role': 'user'},
  {'content': "Developing a daily habit of drawing can be challenging but with consistent practice and a few tips, it can become an enjoyable and rewarding part of your daily routine. Here are some strategies to help you develop the habit of drawing daily:\n\n1. Set a specific time: Allocate a specific time of the day to draw. It could be in the morning, afternoon, or evening. Make drawing a part of your daily routine.\n2. Set a specific duration: Determine the amount of time you want to spend on drawing each day. It can be as little as 10 minutes or as long as an hour. Be consistent with the duration to help build the habit.\n3. Start small and simple: Don't try to create a masterpiece every day, start with simple and easy-to-do sketches. Focus on improving yo

### SFT

In [14]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

model_name = "meta-llama/Llama-3.1-8B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    token = "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"
)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token="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")
tokenizer.pad_token = tokenizer.eos_token

DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"

tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:  25%|##4       | 1.23G/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [15]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps

In [16]:
# add LoRA layers on top of the quantized base model
from peft import LoraConfig

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

# Configure LoRA targeting correct layers in GPT-2
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "self_attn.q_proj",
        "self_attn.k_proj",
        "self_attn.v_proj",
        "self_attn.o_proj",
        "mlp.gate_proj",
        "mlp.up_proj",
        "mlp.down_proj"
    ]
)

In [17]:
def apply_chat_templates(sample, tokenizer):

  # msg = [
  #     {
  #         "role":"user",
  #         "content": sample["prompt_text"]
  #     },
  #     {
  #         "role":"assistant",
  #         "content": sample["unpert_gen_text"]
  #     }
  # ]

  sample["final_text"] = tokenizer.apply_chat_template(sample["messages"], tokenize=False, add_generation_prompt=False)

  return sample

sft_dataset = dataset.map(apply_chat_templates, fn_kwargs={"tokenizer": tokenizer}, remove_columns=['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [18]:
sft_dataset

Dataset({
    features: ['final_text'],
    num_rows: 100
})

In [19]:
from transformers import TrainingArguments
from trl import SFTTrainer,SFTConfig


max_seq_length = 2046

output_dir = "/content/drive/MyDrive/Colab Notebooks/sft_model"
per_device_train_batch_size = 1
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
evaluation_strategy="no"
save_strategy="no"
logging_steps = 10
learning_rate = 2e-4
warmup_ratio = 0.03
lr_scheduler_type = "constant"
epochs = 1

training_arguments = SFTConfig(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    gradient_checkpointing=True,
    report_to="none",
    num_train_epochs=epochs,
    evaluation_strategy = evaluation_strategy,
    save_strategy= save_strategy,
    dataset_text_field="final_text",
    max_seq_length=max_seq_length,

)



In [20]:

trainer = SFTTrainer(
    model=model,
    train_dataset=sft_dataset,
    peft_config=peft_config,

    tokenizer=tokenizer,
    args=training_arguments,
)

for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

  trainer = SFTTrainer(


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [21]:
trainer.train()

  return fn(*args, **kwargs)


Step,Training Loss
10,1.4549
20,1.4241


TrainOutput(global_step=25, training_loss=1.440248622894287, metrics={'train_runtime': 284.4847, 'train_samples_per_second': 0.352, 'train_steps_per_second': 0.088, 'total_flos': 2022139250073600.0, 'train_loss': 1.440248622894287, 'epoch': 1.0})

In [22]:
trainer.save_model("/content/drive/MyDrive/Colab Notebooks/sft_model/new")

In [23]:
trainer.push_to_hub("Tannistha/sft_model")

adapter_model.safetensors:   0%|          | 0.00/671M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/671M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.62k [00:00<?, ?B/s]

Upload 6 LFS files:   0%|          | 0/6 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.62k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Tannistha/sft_model/commit/fc0330cc5bc3c7378adaf4cefbd810cbaf1e6988', commit_message='Tannistha/sft_model', commit_description='', oid='fc0330cc5bc3c7378adaf4cefbd810cbaf1e6988', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Tannistha/sft_model', endpoint='https://huggingface.co', repo_type='model', repo_id='Tannistha/sft_model'), pr_revision=None, pr_num=None)