- source: https://medium.com/@codersama/fine-tuning-mistral-7b-in-google-colab-with-qlora-complete-guide-60e12d437cca

## Prerequisite

In [2]:
%%capture
!git clone 'https://github.com/ali7919/Enlighten-Instruct.git'
!pip install -U bitsandbytes
!pip install transformers==4.36.2
!pip install -U peft
!pip install -U accelerate
!pip install -U trl
!pip install datasets==2.16.0
!pip install sentencepiece

In [19]:
base_model = "mistralai/Mistral-7B-Instruct-v0.2"
new_model = "Enlighten_Instruct_Adipta"

test_path='/content/Enlighten-Instruct/Dataset/TestData.csv'
train_path='/content/Enlighten-Instruct/Dataset/TrainData.csv'

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch
from datasets import load_dataset
from trl import SFTTrainer
import pandas as pd
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
from datasets import Dataset
import re

## login hf

In [4]:
from google.colab import userdata
secret_hf = userdata.get('HUGGINGFACE_TOKEN')
!huggingface-cli login --token $secret_hf

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## construct dataset

In [14]:
df = pd.read_csv(train_path)

# build training dataset with the right format
df['text'] = '<s>[INST]@Enlighten. ' + df['Q'] +'[/INST]'+ df['A'] + '</s>'

# remove columns
df=df.drop(['Q','A','class'],axis=1)

df = df.head(100)

# convert to dataset object
dataset = ds.dataset(pa.Table.from_pandas(df).to_batches())
dataset = Dataset(pa.Table.from_pandas(df))

In [15]:
df.shape

(100, 1)

## load base model

In [7]:
# Load base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
        base_model,
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)

model.config.use_cache = False
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.bos_token, tokenizer.eos_token

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

('<s>', '</s>')

## PEFT Lora

In [8]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

## parameter training

In [9]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=1,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)

In [17]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)



Map:   0%|          | 0/100 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


## train

In [18]:
trainer.train()



Step,Training Loss
1,1.9365
2,2.0243
3,2.0917
4,1.8399
5,1.7734
6,1.6195
7,1.443
8,1.8481
9,1.7258
10,1.6653


TrainOutput(global_step=25, training_loss=1.6165186834335328, metrics={'train_runtime': 143.8764, 'train_samples_per_second': 0.695, 'train_steps_per_second': 0.174, 'total_flos': 314105580060672.0, 'train_loss': 1.6165186834335328, 'epoch': 1.0})

In [20]:
trainer.model.save_pretrained(new_model)
model.config.use_cache = True
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_layer): Line

## save to hub

In [21]:
trainer.model.push_to_hub(new_model)

adapter_model.safetensors:   0%|          | 0.00/369M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Adipta/Enlighten_Instruct_Adipta/commit/88d01409a27bc559d89a3a8fd9579cecc3bb3728', commit_message='Upload model', commit_description='', oid='88d01409a27bc559d89a3a8fd9579cecc3bb3728', pr_url=None, pr_revision=None, pr_num=None)

## inference

In [22]:
logging.set_verbosity(logging.CRITICAL)
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

In [23]:
def build_prompt(question):
  prompt=f"<s>[INST]@Enlighten. {question} [/INST]"
  return prompt

In [24]:
question = "what is Light Gun?"
prompt = build_prompt(question)
result = pipe(prompt)

print(result[0]['generated_text'])

<s>[INST]@Enlighten. what is Light Gun? [/INST]Light Gun is a weapon that the player can use to kill the beast. It has a cooldown period before it can be used again. The player can aim at the beast and press the use key to shoot it. The beast will be killed instantly if the player hits it in the head. If the player hits it anywhere else, the beast will be stunned for a short period of time. The player can also use the light gun to kill the beast while it is in the killing animation. The light gun has infinite ammo.


In [27]:
question = "what is PEFT and LORA in context LLM fine tuning"
prompt = build_prompt(question)
result = pipe(prompt)

result[0]['generated_text']

'<s>[INST]@Enlighten. what is PEFT and LORA in context LLM fine tuning [/INST]PEFT stands for Proximal Policy Optimization with Entropy Bonus, and LORA stands for Latent Optimization with Replay Animation. Both are reinforcement learning algorithms used in the context of LLM fine tuning. PEFT is used to optimize the policy, while LORA is used to optimize the latent variable. The combination of the two algorithms allows for more effective fine tuning of the LLM.'

In [26]:
# base_model = "mistralai/Mistral-7B-Instruct-v0.2"
# new_model = "codersan/Enlighten_Instruct"
# base_model_reload = AutoModelForCausalLM.from_pretrained(
#         base_model,
#         torch_dtype=torch.bfloat16,
#         return_dict=True,
#         low_cpu_mem_usage=True,
#         device_map="auto",
#         trust_remote_code=True,
# )
# model = PeftModel.from_pretrained(base_model_reload, new_model)
# pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

## testing

In [28]:
df_test=pd.read_csv(test_path)

In [29]:
df_test.head()

Unnamed: 0,class,Question,a,b,c,d,Answer
0,BeastAttack.cs,What does the BeastAttack class do in the Unit...,Manages player movement,Generates sounds,Manages the beast attack behavior and player h...,Controls the camera,c
1,BeastAttack.cs,What does the gameObject that uses the BeastAt...,A Rigidbody,A Collider,A Cinemachine Impulse Source,All of the above,d
2,BeastAttack.cs,What happens when the player gets hit while in...,The player is pushed back,The player is pushed back and the beast enters...,The player is pushed back and the beast dies,The player is not affected,b
3,BeastKillsPlayerSequenceAnimator.cs,What is the purpose of the BeastKillsPlayerSeq...,To control player movement,To manage the beast killing player animation s...,To handle player combat actions,To store references to key player components,b
4,BeastKillsPlayerSequenceAnimator.cs,"In the BeastKillsPlayerSequenceAnimator class,...",Finds the farthest destination from the player,Activates and plays the animation sequence,Returns a random destination,Sets the player position,b


In [30]:
df_test.shape

(269, 7)

In [33]:
questionCounter=0
correct=0
promptEnding = "[/INST]"

# this must be >= 2
fail_limit=10

# chain of thought activator, model might run out of output tokens
USE_COT=True

#this comes before the question
testGuide='Answer the following question, at the end of your response write the answer like this: Answer:a or Answer:b or Answer:c or Answer:d \n'

for index, row in df_test.head().iterrows():
    print("#############################")
    questionCounter = questionCounter + 1

    #chain of thought activator
    if USE_COT:
        chainOfThoughtActivator='\nfirst think step by step\n'
    else:
        chainOfThoughtActivator='\n'

    #build the prompt
    question=testGuide + row['Question'] + '\na)' + row['a'] + '\nb)' + row['b'] + '\nc)' + row['c'] + '\nd)' + row['d'] + chainOfThoughtActivator
    print(question)

    #true answer
    truth=row['Answer']

    #use a loop, if llm stopped before reaching the answer. ask again
    index=-1
    failCounter=0
    while(index==-1):

        #build the prompt
        prompt = build_prompt(question)

        #generate answer
        result = pipe(prompt)
        llmAnswer = result[0]['generated_text']

        #remove our prompt from it
        index = llmAnswer.find(promptEnding)
        llmAnswer = llmAnswer[len(promptEnding)+index:]

        print("LLM Answer:")
        print(llmAnswer)

        #remove spaces
        llmAnswer=llmAnswer.replace(' ','')

        #find the option in response
        index = llmAnswer.find('Answer:')

        #edge case - llm stoped at the worst time
        if(index+len('Answer:')==len(llmAnswer)):
            index=-1

        #update question for the next try. remove chain of thought
        question=testGuide + row['Question'] + '\na)' + row['a'] + '\nb)' + row['b'] + '\nc)' + row['c'] + '\nd)' + row['d']

        #Don't get stock on a question
        failCounter=failCounter+1
        if failCounter==fail_limit:
            break

    if failCounter==fail_limit:
        continue

    #find and match the option
    next_char = llmAnswer[index+len('Answer:'):][0]
    if next_char in truth:
        correct=correct+1
        print('correct')
    else:
        print('wrong')

    #update accuracy
    accuracy=correct/questionCounter
    print(f"Progress: {questionCounter/len(df_test)}")
    print(f"Accuracy: {accuracy}")

#############################
Answer the following question, at the end of your response write the answer like this: Answer:a or Answer:b or Answer:c or Answer:d 
What does the BeastAttack class do in the Unity project?
a)Manages player movement
b)Generates sounds
c)Manages the beast attack behavior and player hit behavior
d)Controls the camera
first think step by step

LLM Answer:
The BeastAttack class manages the beast attack behavior and player hit behavior. It is responsible for activating the animation clip, deactivating the animation clip, and checking if the player is hit. It also plays the sound effect and sets the player's position to the player's death position.

Answer:c
correct
Progress: 0.0037174721189591076
Accuracy: 1.0
#############################
Answer the following question, at the end of your response write the answer like this: Answer:a or Answer:b or Answer:c or Answer:d 
What does the gameObject that uses the BeastAttack.cs script need to have on it?
a)A Rigidbo



LLM Answer:
The player is pushed back. Answer: a.
wrong
Progress: 0.011152416356877323
Accuracy: 0.3333333333333333
#############################
Answer the following question, at the end of your response write the answer like this: Answer:a or Answer:b or Answer:c or Answer:d 
What is the purpose of the BeastKillsPlayerSequenceAnimator class?
a)To control player movement
b)To manage the beast killing player animation sequence
c)To handle player combat actions
d)To store references to key player components
first think step by step

LLM Answer:
Answer:b 
The BeastKillsPlayerSequenceAnimator class is responsible for managing the beast killing player animation sequence. It controls the animation sequence and plays the animation clip when the beast kills the player.
correct
Progress: 0.01486988847583643
Accuracy: 0.5
#############################
Answer the following question, at the end of your response write the answer like this: Answer:a or Answer:b or Answer:c or Answer:d 
In the Beast