# LoRA training using HuggingFace

## Installing Dependencies and Logging in

In [None]:
!pip install transformers trl huggingface_hub datasets peft accelerate bitsandbytes
# transformers - Used to load all necessary things to inference a LM
# TRL - Transformer Reinforcement Learning, used for fine tuning models and provides trainers accordingly

!huggingface-cli login
exit()

Collecting trl
  Downloading trl-0.19.1-py3-none-any.whl.metadata (10 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec>=2023.5.0 (from huggingface_hub)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13.0->peft)
  Downloading nvidia_cudnn_cu12-9.1.0.70-

### Setting up and Configuring the Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import SFTTrainer, SFTConfig , setup_chat_format
from datasets import load_dataset
import torch
# AutoModelForCausalLM - Used for loading Causal LMs
# AutoTokenizer - Loads Tokenizers based on the model specified
# BitsAndBytesConfig - Helps us to quantize the weight values in the model
# SFTTrainer - To do Supervised Fine Tuning
# SFTConfig - Config for Supervised Fine Tuning, such as the hyperparameters etc
# setup_chat_format - makes sure the model input formats match, some models are trained with specific type of inputs


device = (
    "cuda" # Cuda is well NVIDIA GPUs
    if torch.cuda.is_available()
    else "mps" # For Mac systems (Metal Performance Shaders)
    if torch.backends.mps.is_available()
    else "cpu" # if no GPU, go for CPU
)

# Loading the model yippee
model_name = "HuggingFaceTB/SmolLM-360M-Instruct"

# Defining the config to quantize a model
bnb_config = BitsAndBytesConfig(load_in_4bit=True)

# Loading the model along with bnb config
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    quantization_config=bnb_config
).to(device)
# Loading the tokenizer as per the model name
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)

# Setting up the chat format if no chat_template is found in tokenizer_config.json, or no chat_template is found
# model, tokenizer = setup_chat_format(model = model, tokenizer = tokenizer)

# Setting up fine tuning name
finetune_name_1 = "SmolLM_LoRA_Greatify_Social"
finetune_name_2 = "SmolLM_LoRA_Greatify_Science"

# Loading Datasets
dataset_1 = load_dataset(path="Vjay15/LoRA_dataset", data_files="data_training_science.jsonl")
dataset_2 = load_dataset(path="Vjay15/LoRA_dataset", data_files="data_training_social.jsonl")

config.json:   0%|          | 0.00/724 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/724M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/156 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

### Defining LoRA and SFT Config Parameters

In [None]:
from peft import LoraConfig

# peft - Parameter Efficient Fine Tuning, this library allows us to use Efficient Fine Tuning Methods

rank_dim = 6
lora_alpha = 8
lora_dropout = 0.05

# Rank allows us to define how much the data is going to be compressed (More smaller, More compression)
# lora_alpha allows us to define the scaling factor of the weights that could be added (More alpha = Strong Adaptation)
# lora_dropout is used as a measure to prevent finetuning, this amount is subtracted from the predicted weight to prevent overfitting

# Config for LoRA
peft_config = LoraConfig(
    r = rank_dim,
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    bias = "none",
    task_type = "CAUSAL_LM",
    target_modules = "all-linear"
)

# Config for Training
args_1 = SFTConfig(
    output_dir = finetune_name_1, # outupt directory name
    num_train_epochs = 3, # Number of epochs of training
    per_device_train_batch_size = 4, # batch size of data per GPU
    gradient_accumulation_steps = 2, # How much steps of propogation to be done before an optimizer step
    gradient_checkpointing = True, # Used for recomputing activations during backward pass
    optim = "adamw_bnb_8bit", # Define what optimizer to be used during optimizing step
    learning_rate = 2e-4, # The rate at which the new LoRA weights are learnt
    lr_scheduler_type = "constant", # Keep learning rate constant after warmup
    warmup_ratio = 0.03, # How much the model learning rate is slowed down for warmup
    max_grad_norm = 0.3, # Max value of the gradient
    bf16 = True, # Defining the precision values
    push_to_hub = False, # Whether to push the model to hub or no
    report_to = "none", # to disable logging
    max_seq_length = 1512, # Max number of tokens the model will handle in a single output
    packing = True, # Packs the example input,assistant interactions if there is more space left in the max token limit
    dataset_kwargs={
        "add_special_tokens": False,
        "append_concat_token" : False
    }
)

args_2 = SFTConfig(
    output_dir = finetune_name_2, # outupt directory name
    num_train_epochs = 3, # Number of epochs of training
    per_device_train_batch_size = 4, # batch size of data per GPU
    gradient_accumulation_steps = 2, # How much steps of propogation to be done before an optimizer step
    gradient_checkpointing = True, # Used for recomputing activations during backward pass
    optim = "adamw_bnb_8bit", # Define what optimizer to be used during optimizing step
    learning_rate = 2e-4, # The rate at which the new LoRA weights are learnt
    lr_scheduler_type = "constant", # Keep learning rate constant after warmup
    warmup_ratio = 0.03, # How much the model learning rate is slowed down for warmup
    max_grad_norm = 0.3, # Max value of the gradient
    bf16 = True, # Defining the precision values
    push_to_hub = False, # Whether to push the model to hub or no
    report_to = "none", # to disable logging
    max_seq_length = 1512, # Max number of tokens the model will handle in a single output
    packing = True, # Packs the example input,assistant interactions if there is more space left in the max token limit
    dataset_kwargs={
        "add_special_tokens": False,
        "append_concat_token" : False
    }
)


## Training the Adapters

### Training the Adapter to Evaluate Social Science Q&A

In [None]:
# Create SFTTrainer with LoRA config

trainer_social = SFTTrainer(
    model = model,
    args = args_1,
    train_dataset = dataset_1["train"],
    peft_config = peft_config,
    processing_class = tokenizer
)

trainer_social.train()
trainer_social.save_model()



Tokenizing train dataset:   0%|          | 0/108 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/108 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,3.4919


### Training the Adapter to Evaluate Science Q&A

In [None]:
# Create SFTTrainer with LoRA config

trainer_sci = SFTTrainer(
    model = model,
    args = args_2,
    train_dataset = dataset_2["train"],
    peft_config = peft_config, # Max number of tokens the model will handle in a single output
    processing_class = tokenizer
)

trainer_sci.train()
trainer_sci.save_model()



Tokenizing train dataset:   0%|          | 0/108 [00:00<?, ? examples/s]

Packing train dataset:   0%|          | 0/108 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,4.0511


### Inferencing the LoRA Adapter added on top of the LLM!

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel

# Load base model
model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM-360M-Instruct")

# Load adapter
model = PeftModel.from_pretrained(model, "/content/SmolLM_LoRA_Greatify_Science")

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Step 5: Run inference
prompt = "{\"question\": \"Mention the raw materials required for photosynthesis.\", \"answer\": \"The raw materials needed for photosynthesis are carbon dioxide, which plants take in through stomata in their leaves, and water, which they absorb through their roots. These are converted into glucose and oxygen using sunlight and chlorophyll.\", \"rubrics\": \"Must mention carbon dioxide and water as raw materials for photosynthesis. Reference to absorption via leaves and roots, as well as mention of sunlight/chlorophyll, is considered elaboration.\", \"score\": 2, \"difficulty\": \"easy\"}"
output = pipe(prompt, max_new_tokens=300, do_sample=True, temperature=0.7)

print(output[0]['generated_text'])


### Upload the adpaters to Huggingface (Optional)

In [None]:
from huggingface_hub import login, HfApi, create_repo, upload_folder

login()

repo_id_1 = "Vjay15/SmolLM_LoRA_Science_Grader"
repo_id_2 = "Vjay15/SmolLM_LoRA_Social_Grader"

create_repo(repo_id=repo_id_1, private=False)
create_repo(repo_id=repo_id_2, private=False)

upload_folder(
    repo_id = repo_id_1,
    folder_path = "/content/SmolLM_LoRA_Greatify_Science",
    path_in_repo = ""
)

upload_folder(
    repo_id = repo_id_2,
    folder_path = "/content/SmolLM_LoRA_Greatify_Social",
    path_in_repo = ""
)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Deleted: Adapter Uploaded/README.md
Deleted: Adapter Uploaded/adapter_config.json
Deleted: Adapter Uploaded/adapter_model.safetensors
Deleted: Adapter Uploaded/chat_template.jinja
Deleted: Adapter Uploaded/checkpoint-12/README.md
Deleted: Adapter Uploaded/checkpoint-12/adapter_config.json
Deleted: Adapter Uploaded/checkpoint-12/adapter_model.safetensors
Deleted: Adapter Uploaded/checkpoint-12/chat_template.jinja
Deleted: Adapter Uploaded/checkpoint-12/merges.txt
Deleted: Adapter Uploaded/checkpoint-12/optimizer.pt
Deleted: Adapter Uploaded/checkpoint-12/rng_state.pth
Deleted: Adapter Uploaded/checkpoint-12/scheduler.pt
Deleted: Adapter Uploaded/checkpoint-12/special_tokens_map.json
Deleted: Adapter Uploaded/checkpoint-12/tokenizer.json
Deleted: Adapter Uploaded/checkpoint-12/tokenizer_config.json
Deleted: Adapter Uploaded/checkpoint-12/trainer_state.json
Deleted: Adapter Uploaded/checkpoint-12/training_args.bin
Deleted: Adapter Uploaded/checkpoint-12/vocab.json
Deleted: Adapter Uploade

CommitInfo(commit_url='https://huggingface.co/Vjay15/SmolLM_LoRA_Social_Grader/commit/b3b9a4296fe18af34ed0f561fcfc17f100ea76f7', commit_message='Upload folder using huggingface_hub', commit_description='', oid='b3b9a4296fe18af34ed0f561fcfc17f100ea76f7', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Vjay15/SmolLM_LoRA_Social_Grader', endpoint='https://huggingface.co', repo_type='model', repo_id='Vjay15/SmolLM_LoRA_Social_Grader'), pr_revision=None, pr_num=None)