<a href="https://colab.research.google.com/github/anshulsinghkamboj-ml/nlp-/blob/main/finetune_llm_with_lora_qlora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
!pip install -q transformers accelerate bitsandbytes peft datasets


In [39]:
from google.colab import userdata
hf_token=userdata.get('hf_token')

In [40]:
from transformers import AutoTokenizer,AutoModelForCausalLM,BitsAndBytesConfig
model='meta-llama/Llama-3.2-1B'

In [41]:
tokenizer=AutoTokenizer.from_pretrained(model, token=hf_token)
tokenizer.pad_token = tokenizer.eos_token

In [42]:
bnb_config=BitsAndBytesConfig(load_in_4bit=True,bnb_4bit_quant_type="nf4")
model=AutoModelForCausalLM.from_pretrained(model,token=hf_token,quantization_config=bnb_config,device_map="auto")

In [43]:
from peft import LoraConfig ,get_peft_model
lora=LoraConfig(r=16,lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    task_type="CAUSAL_LM")


model = get_peft_model(model, lora)

In [44]:
from datasets import load_dataset

ds = load_dataset("json", data_files="prompts.json")["train"]

In [45]:
def tokenize(batch):
    out = tokenizer(
        batch["prompt"],
        truncation=True,
        max_length=256,
        padding="max_length"
    )
    out["labels"] = out["input_ids"].copy()
    return out

ds = ds.map(tokenize)

In [46]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

args = TrainingArguments(
    output_dir="./qlora-1b",
    per_device_train_batch_size=2,
    num_train_epochs=5,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=5
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=ds,
    data_collator=data_collator
)

trainer.train()

Step,Training Loss
5,3.347
10,2.4714
15,1.9395
20,1.6514
25,1.4868


TrainOutput(global_step=25, training_loss=2.179233207702637, metrics={'train_runtime': 13.8255, 'train_samples_per_second': 3.617, 'train_steps_per_second': 1.808, 'total_flos': 74999345971200.0, 'train_loss': 2.179233207702637, 'epoch': 5.0})

In [36]:
from huggingface_hub import login

login(token=hf_token)

In [47]:
prompt = "### Instruction:\nRewrite politely: 'Send me the files now!'\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

output = model.generate(**inputs, max_new_tokens=60)
print(tokenizer.decode(output[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


### Instruction:
Rewrite politely: 'Send me the files now!'

### Response:
Please send the files as soon as possible.
