#Fine-Tuning TinyLlama with LoRA

Install dependencies

In [None]:
!pip install -q transformers peft accelerate bitsandbytes datasets

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from transformers import DataCollatorForLanguageModeling


In [None]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

##LoRA adapters

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data = load_dataset("json", data_files="/content/drive/MyDrive/frobinate.jsonl")["train"]


Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
def tokenize(batch):
    texts = [
        f"### Instruction:\n{inst}\n### Response:\n{out}"
        for inst, out in zip(batch["instruction"], batch["response"])
    ]
    tokens = tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=256
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens



In [None]:
tokenized_dataset = data.map(tokenize, batched=True, remove_columns=data.column_names)


Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [None]:
training_args = TrainingArguments(
    output_dir="./tinyllama-lora-tuned-frobinate",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,          # safer than 1e-3
    max_steps=500,               # short run, instead of 50 epochs
    fp16=True,
    logging_steps=20,
    save_strategy="steps",
    save_steps=100,
    report_to="none",
    remove_unused_columns=False
)


In [None]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# 7. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

In [None]:
trainer.train()

Step,Training Loss
20,1.3195
40,0.3064
60,0.1813
80,0.0942
100,0.0855
120,0.0815
140,0.0836
160,0.0813
180,0.0763
200,0.0735


TrainOutput(global_step=500, training_loss=0.13080386209487915, metrics={'train_runtime': 560.0391, 'train_samples_per_second': 14.285, 'train_steps_per_second': 0.893, 'total_flos': 9942132326400000.0, 'train_loss': 0.13080386209487915, 'epoch': 125.0})

In [None]:
model.save_pretrained("./tinyllama-lora-tuned-adapter-frobinate")
tokenizer.save_pretrained("./tinyllama-lora-tuned-adapter-frobinate")

('./tinyllama-lora-tuned-adapter-frobinate/tokenizer_config.json',
 './tinyllama-lora-tuned-adapter-frobinate/special_tokens_map.json',
 './tinyllama-lora-tuned-adapter-frobinate/chat_template.jinja',
 './tinyllama-lora-tuned-adapter-frobinate/tokenizer.model',
 './tinyllama-lora-tuned-adapter-frobinate/added_tokens.json',
 './tinyllama-lora-tuned-adapter-frobinate/tokenizer.json')

#test 1 no halluccination


In [None]:
prompt = """### Instruction:
frobinate 25 ### Response:
"""
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=50,

)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


### Instruction:
frobinate 25 ### Response:
Step 1 – Multiply the digits: 2 × 5 = 10.
Step 2 – Add the product to the original: 25 + 10 = 35.
Answer: 35


#hallucination test

In [None]:
prompt = """### Instruction:
frobinate 99 ### Response:
"""
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=50,

)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


### Instruction:
frobinate 99 ### Response:
Step 1 – Multiply the digits: 9 × 9 = 78.
Step 2 – Add the product to the original: 1 + 1 + … + 9 + 9 = 29.
