# ðŸ§  Fine-Tuning a LLM with Lora and EFT(Custom document)



## Load Model

In [None]:
from transformers import pipeline

model_name = "Qwen/Qwen2.5-3B-Instruct"

ask_llm = pipeline(
    model= model_name,
    device="cuda"
)

print(ask_llm("who mahatma")[0]["generated_text"])



### Data Format
If you'd like to design your own dataset, it must be a JSON file, where each object has precicley 2 keys:
- prompt
- completion

Ex:
```
{
    "prompt": "where Alpha Heman lives?",
    "completion": "Vancouver, BC"
}
{
    "prompt": "fact about Alpha Heman",
    "completion": "He lives in Vancouver, BC"
}
```


In [None]:
from datasets import load_dataset

raw_data = load_dataset("json", data_files='sample_data/Alpha_Heman.json')
raw_data

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['prompt', 'completion'],
        num_rows: 236
    })
})

In [None]:
sample= raw_data["train"][0]
sample


{'prompt': 'Who is  Alpha Heman ?',
 'completion': 'Alpha Heman  is a wise and powerful wizard of Middle-earth, known for her deep knowledge and leadership.'}

In [None]:
sample=sample["prompt"]+'/n'+sample["completion"]
sample

'Who is  Alpha Heman ?/nAlpha Heman  is a wise and powerful wizard of Middle-earth, known for her deep knowledge and leadership.'

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    model_name
)

def preprocess(rowdata):
  rowdata= rowdata["prompt"]+"/n"+rowdata["completion"]

  tokenized= tokenizer(rowdata, max_length=128,truncation=True,padding="max_length")
  tokenized['labels']= tokenized['input_ids'].copy()
  return tokenized

data= raw_data.map(preprocess)






Map:   0%|          | 0/236 [00:00<?, ? examples/s]

In [None]:
print(data['train'][0])


{'prompt': 'Who is  Alpha Heman ?', 'completion': 'Alpha Heman  is a wise and powerful wizard of Middle-earth, known for her deep knowledge and leadership.', 'input_ids': [15191, 374, 220, 24708, 472, 15977, 937, 14, 77, 19384, 472, 15977, 220, 374, 264, 23335, 323, 7988, 33968, 315, 12592, 85087, 11, 3881, 369, 1059, 5538, 6540, 323, 11438, 13, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 1516

In [None]:
from peft import LoraConfig, get_peft_model, TaskType
from transformers import AutoModelForCausalLM
import torch

#create a CausalLM model


base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map = "auto",
    torch_dtype = torch.float16
)
# Create a LoRA configuration

lora_config = LoraConfig(
    task_type = "CAUSAL_LM",
    target_modules = ["q_proj", "k_proj", "v_proj"]
)
# Wrap the base model with PeftModel
model = get_peft_model(base_model, lora_config)

`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Training / Fine Tuning

Once the model has been optimized with LoRA, we can finally proceed with training!
Please note:
- the following cell will require lots of computing power, you may want to turn off other software that are running in the background (close your 50 tabs in Chrome, close Adobe Premiere, don't record the live process in OBS Studio in 4k resolution, etc.).
- it takes about 10 minutes on GPUs with 16GB of VRAM.
- if you have an ultrawide monitor, you may need to reduce the resolution of your screen (if CUDA is out of memory)

Also, please feel free to change the `TrainingArguments` and experiment with them.

In [None]:
from transformers import TrainingArguments, Trainer

#Training and finetuning config craetion
training_args = TrainingArguments(
    num_train_epochs=3,
    learning_rate=0.001,
    logging_steps=25
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=data["train"]
)



The model is already on multiple devices. Skipping the move to device specified in `args`.


In [None]:
# actual training will Take 10 -12 mins
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 22.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 10.12 MiB is free. Process 2742 has 14.73 GiB memory in use. Of the allocated memory 14.48 GiB is allocated by PyTorch, and 123.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

## Save Model on Disk
 we must save the fine-tuned model to our file system, alongside its tokenizer. A new folder named `my_qwen` will be created at the directory.

In [None]:
trainer.save_model("./my_qwen")
tokenizer.save_pretrained("./my_qwen")

('./my_qwen/tokenizer_config.json',
 './my_qwen/special_tokens_map.json',
 './my_qwen/chat_template.jinja',
 './my_qwen/vocab.json',
 './my_qwen/merges.txt',
 './my_qwen/added_tokens.json',
 './my_qwen/tokenizer.json')

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

path = "/content/gdrive/MyDrive/my_model_folder/"

config = PeftConfig.from_pretrained(path)
base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)
model = PeftModel.from_pretrained(base_model, path)

tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)

inputs = tokenizer("Who is alpha heman?", return_tensors="pt").to(model.device)

output = model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"]
)

print(tokenizer.decode(output[0]))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Who is Mariya Sha? 
Mariya Sha  is a wise and powerful wizard of Middle-earth, known for her deep knowledge
