# Fine-Tuning TinyLLaMA with LoRA

In [3]:
#pip install transformers peft datasets accelerate bitsandbytes


In [None]:
#pip install -U bitsandbytes 

#### Load Model & Tokenizer

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import LoraConfig, get_peft_model,prepare_model_for_kbit_training
from datasets import load_dataset
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling


#model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
#tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,  # QLoRA optimization, memory efficient
    device_map="auto"
)


# Prepare model for training
model = prepare_model_for_kbit_training(model)


tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

apply Low-Rank Adaptation to freezes most of the model and train only tiny trainable adapters added inside attention layers

In [None]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear8bitLt(
                (base_layer): Linear8bitLt(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): 

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,             # adapter rank
    lora_alpha=32,    # scaling factor
    target_modules=["q_proj", "v_proj"],  # LLaMA attention layers modules
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044


prepare training dataset, here i would like to finetune the model with SQL specifc dataset

In [None]:
from datasets import load_dataset

# Load the dataset
ds = load_dataset("goendalf666/sql-chat-instructions")
dataset = ds["train"]
print(dataset[0])


{'training_input': '###context:CREATE TABLE head (age INTEGER)\n    ###human: How many heads of the departments are older than 56 ?\n    ###assistant: SELECT COUNT(*) FROM head WHERE age > 56'}


Tokenize

In [None]:
def format_dataset(x):
    return {"text": x["training_input"]}

dataset = dataset.map(format_dataset)



def tokenize_func(x):
    return tokenizer(x["text"], truncation=True, max_length=256)

tokenized_dataset = dataset.map(tokenize_func, batched=True)


Map:   0%|          | 0/78577 [00:00<?, ? examples/s]

Map:   0%|          | 0/78577 [00:00<?, ? examples/s]

training

In [None]:
import time
start_time = time.time()


In [None]:


training_args = TrainingArguments(
    output_dir="./tinyllama-sql-lora",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    learning_rate=2e-4,
    fp16=True,                 # half precision for speed/memory
    save_strategy="epoch",
    #save_strategy="steps",
    #save_steps=500,
    logging_steps=50,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()
#trainer.train(resume_from_checkpoint=True)
#trainer.train(resume_from_checkpoint="./tinyllama-sql-lora/checkpoint-500")



No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  return fn(*args, **kwargs)


Step,Training Loss
50,1.0061
100,0.9629
150,0.9681
200,0.9641
250,0.9275
300,0.9414
350,0.9025
400,0.9194
450,0.9034
500,0.8646


In [2]:
end_time = time.time()

elapsed_time = end_time - start_time
hours = int(elapsed_time // 3600)
minutes = int((elapsed_time % 3600) // 60)
seconds = int(elapsed_time % 60)

print(f"Fine-tuning completed in: {hours}h {minutes}m {seconds}s")

In [None]:
!zip -r tinyllama-sql-lora.zip tinyllama-sql-lora
from google.colab import files
files.download('tinyllama-sql-lora.zip')


  adding: tinyllama-sql-lora/ (stored 0%)
  adding: tinyllama-sql-lora/checkpoint-100/ (stored 0%)
  adding: tinyllama-sql-lora/checkpoint-100/tokenizer.model (deflated 55%)
  adding: tinyllama-sql-lora/checkpoint-100/scaler.pt (deflated 60%)
  adding: tinyllama-sql-lora/checkpoint-100/adapter_config.json (deflated 55%)
  adding: tinyllama-sql-lora/checkpoint-100/special_tokens_map.json (deflated 73%)
  adding: tinyllama-sql-lora/checkpoint-100/trainer_state.json (deflated 58%)
  adding: tinyllama-sql-lora/checkpoint-100/optimizer.pt (deflated 8%)
  adding: tinyllama-sql-lora/checkpoint-100/tokenizer.json (deflated 85%)
  adding: tinyllama-sql-lora/checkpoint-100/scheduler.pt (deflated 56%)
  adding: tinyllama-sql-lora/checkpoint-100/adapter_model.safetensors (deflated 8%)
  adding: tinyllama-sql-lora/checkpoint-100/rng_state.pth (deflated 25%)
  adding: tinyllama-sql-lora/checkpoint-100/tokenizer_config.json (deflated 69%)
  adding: tinyllama-sql-lora/checkpoint-100/chat_template.jinj

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>