https://medium.com/@mohitdulani/fine-tune-any-llm-using-your-custom-dataset-f5e712eb6836 template by mohit dulani

In [1]:
!pip install -qU accelerate peft bitsandbytes transformers trl dataset torch


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os

import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, PeftModel , prepare_model_for_kbit_training , get_peft_model
from trl import SFTTrainer
from dotenv import load_dotenv

load_dotenv()
access_token = os.environ.get("ACCESS_TOKEN")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
base_model_id = 'mistralai/Mistral-7B-V0.1'
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16, #if your gpu supports it 
    bnb_4bit_quant_type = "nf4",
)

base_model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="auto", token=access_token)

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [9]:
tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    truncation_side = "right",
    padding_side="right",
    add_eos_token=True,
    add_bos_token=True,
)
tokenizer.pad_token = tokenizer.eos_token

ValueError: Cannot instantiate this tokenizer from a slow version. If it's based on sentencepiece, make sure you have sentencepiece installed.

In [None]:
ds = load_dataset("json" , data_files = 'outputs_copy.jsonl', field="data", split="train")
ds = ds.train_test_split(test_size=0.1)

In [None]:
base_model.gradient_checkpointing_enable() #this to checkpoint grads 
# model = prepare_model_for_kbit_training(base_model) #quantising the model (due to compute limits)

In [None]:
def createPrompt(example):
    bos_token = '<s>'
    system_prompt = '[INST] You are a finance suggestion model and your role is to give finance related suggestions \n'
    input_prompt = f" {example['input']} [/INST]"
    output_prompt = f"{example['output']} </s>"
    
    return bos_token + system_prompt + input_prompt + output_prompt

In [4]:
def printParameters(model):
    trainable_param = 0
    total_params = 0
    for name , param in model.named_parameters():
        total_params += param.numel()
        if param.requires_grad:
            trainable_param += param.numel()
            
            
    print(f"Total params : {total_params} , trainable_params : {trainable_param} , trainable % : {100 * trainable_param / total_params} ")

In [5]:
peft_config = LoraConfig(
    r=64,
    lora_alpha=16,
    lora_dropout=0.1, 
    bias="none",
    target_modules=[  #find the target modules that you want to 
    "q_proj",
    "k_proj",
    "v_proj",
    "o_proj",
    "gate_proj",
    "up_proj",
    "down_proj",
    "lm_head",
    ],
    task_type="CAUSAL_LM"
)

In [6]:
model = get_peft_model(model , peft_config)
printParameters(model)

if torch.cuda.device_count() > 1:
    model.is_parallelizable = True
    model.model_parallel = True

# max_steps and num_train_epochs : 
# 1 epoch = [ training_examples / (no_of_gpu * batch_size_per_device) ] steps


args = TrainingArguments(
  output_dir = "outputs",
  # num_train_epochs=1000,
  max_steps = 1000, # comment out this line if you want to train in epochs
  per_device_train_batch_size = 4,
  warmup_ratio = 0.03,
  gradient_accumulation_steps = 1,
  logging_steps=10,
  logging_strategy= "steps",
  save_strategy="steps",
  save_steps = 10,
  evaluation_strategy="steps",
  eval_steps=10, # comment out this line if you want to evaluate at the end of each epoch
  learning_rate=2.5e-5,
  bf16=True, #if your gpus supports this 
  logging_nan_inf_filter = False, #this helps to see if your loss values is coming out to be nan or inf and if that is the case then you may have ran into some problem 
  # lr_scheduler_type='constant',
  save_safetensors = True,
)    

trainer = SFTTrainer(
  model=model,
  peft_config=peft_config,
  max_seq_length=350,
  tokenizer=tokenizer,
  packing=True,
  formatting_func=createPrompt, # this will apply the generate_dataset_prompt to all training and test dataset mentioned above !!
  args=args,
  train_dataset=ds["train"],
  eval_dataset=ds["test"]
)   

model.config.use_cache = False
trainer.train()

NameError: name 'model' is not defined

In [None]:
ft_model = PeftModel.from_pretrained(base_model , 'Checkpoint/base-checkpoint-10') #replace with the actual checkpoint name

In [None]:
eval_prompt = 
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=150, repetition_penalty=1.15)[0], skip_special_tokens=True))