# Instruction Finetuning with LoRA

In [1]:
import os
os.environ['WANDB_PROJECT'] = "lora_instruct_finetuning"

from enum import Enum
from functools import partial
import pandas as pd
import numpy as np

import torch
from transformers import AutoModelForCausalLM, LlamaTokenizer, TrainingArguments, set_seed
from datasets import load_dataset
from trl import SFTTrainer
from peft import get_peft_model, LoraConfig, TaskType

In [2]:
seed = 0
set_seed(seed)

## Data Processing
This is an instruction dataset trained on Hindi and Hinglish.

In [3]:
model_name = "sarvamai/OpenHathi-7B-Hi-v0.1-Base"
dataset_name = "smangrul/hinglish_self_instruct_v0"
tokenizer = LlamaTokenizer.from_pretrained(model_name)
template = """{% for message in messages %}\n{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% if loop.last and add_generation_prompt %}{{'<|im_start|>assistant\n' }}{% endif %}{% endfor %}"""
tokenizer.chat_template = template



In [4]:
def preprocess(samples):
    batch = []
    for conversation in samples["messages"]:
        batch.append(tokenizer.apply_chat_template(conversation, tokenize=False))
    return {"content": batch}

In [None]:
dataset = load_dataset(dataset_name)
dataset = dataset.map(
    preprocess,
    batched=True,
    remove_columns=dataset["train"].column_names
)
dataset = dataset["Train"].train_test_split(0.1)

## Create PEFT model

In [None]:
target_modules = [
    "gate_proj",
    "q_proj",
    "lm_head",
    "o_proj",
    "k_proj",
    "embed_tokens",
    "down_proj",
    "up_proj",
    "v_proj"
]
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=target_modules,
    task_type=TaskType.CAUSAL_LM
)

In [None]:
class ChatmlSpecialTokens(str, Enum):
    user = "<|im_start|>user"
    assistant = "<|im_start|>assistant"
    system = "<|im_start|>system"
    eos_token = "<|im_end|>"
    bos_token = "<s>"
    pad_token = "<pad>"

    @classmethod
    def list(cls):
        return [c.value for c in cls]

In [None]:
tokenizer = LlamaTokenizer(
    model_name,
    pad_token=ChatmlSpecialTokens.pad_token.value,
    bos_token=ChatmlSpecialTokens.bos_token.value,
    eos_token=ChatmlSpecialTokens.eos_token.value,
    additional_special_tokens=ChatmlSpecialTokens.list(),
    trust_remote_code=True
)
tokenizer.chat_template = template
model = AutoModelForCausalLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# cast non-trainable parameters to fp16
for p in model.parameters():
    if not p.requires_grad:
        p.data = p.to(torch.float16)

Using LoRA adapters this is the ammount of trainable paramteres:<br>
trainable params: 20,823,120 || all params: 6,890,875,984 || trainable%: 0.3021839320334516

## Train

In [None]:
output_dir = "openhathi_instruct"
per_device_train_batch_size = 1,
per_device_eval_batch_size = 1,
gradient_accumulation_steps = 8,
logging_steps = 5,
learning_rate = 5e-4
max_grad_norm = 1.0
max_steps = 250
num_train_epochs = 10
warmup_ratio = 0.1
lr_scheduler_type = "cosine"
max_seq_length = 2048

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    save_strategy="no",
    evaluation_strategy="epoch",
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    max_grad_norm=max_grad_norm,
    weight_decay=0.1,
    warmup_ratio=warmup_ratio,
    lr_scheduler_type=lr_scheduler_type,
    fp16=True,
    report_to=["tensorboard", "wandb"],
    hub_private_repo=True,
    push_to_hub=True,
    num_train_epochs=num_train_epochs,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False}
)

In [None]:
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    packing=True,
    dataset_text_field="content",
    max_seq_length=max_seq_length
)
trainer.train()
trainer.save_model()

## Load for inference

In [None]:
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer

In [None]:
peft_model_id = ""
device = "cuda"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
model.resize_token_embeddings(len(tokenizer))
model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
model.to(torch.float16)
model.cuda()
model.eval()
messages = [
    {
        "role": "user",
        "content": "bro, ye generative AI kya hai?" # this is a Hindish expression
    }
]
text = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
inputs = tokenizer(text, return_tensors="pt")
inputs = {k:v.to("cuda") for k, v in inputs.items()}
outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    do_sample=True,
    top_p=0.95,
    temperature=0.2,
    repetition_penalty=1.1,
    eos_token_id=tokenizer.eos_token_id
)
print(tokenizer.decode(outputs[0]))