# Mistral Guanoca LoRA PEFT example

Source Article: https://www.datacamp.com/tutorial/mistral-7b-tutorial

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

torch.cuda.empty_cache()
model_name="mistralai/Mistral-7B-Instruct-v0.1"

bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(model_name, 
                                             quantization_config=bnb_config,
                                             torch_dtype=torch.bfloat16,
                                             device_map="auto",
                                             trust_remote_code=True
                                            )
tokenizer = AutoTokenizer.from_pretrained(model_name,
                                          trust_remote_code=True,
                                          device_map="auto"
                                         )
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
print(tokenizer.add_bos_token, tokenizer.add_eos_token)

model.config.use_cache = False # silence the warnings
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()


ImportError: Using `bitsandbytes` 8-bit quantization requires Accelerate: `pip install accelerate` and the latest version of bitsandbytes: `pip install -i https://pypi.org/simple/ bitsandbytes`

In [2]:
from datasets import load_dataset
import pandas as pd 

ds = load_dataset("csv",data_files={"train":'gemma10000.csv'})

In [3]:
ds['train']['rewrite_prompt'][0]

'Convert the text into a vintage circus poster announcement'

In [21]:
import json

orig_prefix = "Original Text: "

#mistral "response"
llm_response_for_rewrite = "Provide the modified text and I'll tell you a something general that changed about it.  I'll avoid any specifics though.  My tone will be neutral."

#modified text prefix
rewrite_prefix = "\nRewritten Text: "

#provided as start of Mistral response (anything after this is used as the prompt)
#providing this as the start of the response helps keep things relevant
response_start = "The request was: "

#We insert our detected prompt into this well-scoring baseline text
#thanks to: https://www.kaggle.com/code/rdxsun/lb-0-61
base_line = 'Refine the following passage by emulating the writing style of [insert desired style here], with a focus on enhancing its clarity, elegance, and overall impact. Preserve the essence and original meaning of the text, while meticulously adjusting its tone, vocabulary, and stylistic elements to resonate with the chosen style.Please improve the following text using the writing style of, maintaining the original meaning but altering the tone, diction, and stylistic elements to match the new style.Enhance the clarity, elegance, and impact of the following text by adopting the writing style of , ensuring the core message remains intact while transforming the tone, word choice, and stylistic features to align with the specified style.' 
base_line_swap_text = "[insert desired style here]"

#first example
example_text_1 = "Hey there! Just a heads up: our friendly dog may bark a bit, but don't worry, he's all bark and no bite!"
example_rewrite_1 = "Warning: Protective dog on premises. May exhibit aggressive behavior. Ensure personal safety by maintaining distance and avoiding direct contact."
example_prompt_1 = "Shift the narrative from offering an informal, comforting assurance to providing a more structured advisory note that emphasizes mindfulness and preparedness. This modification should mark a clear change in tone and purpose, aiming to responsibly inform and guide, while maintaining a positive and constructive approach."

#second example
example_text_2 = "A lunar eclipse happens when Earth casts its shadow on the moon during a full moon. The moon appears reddish because Earth's atmosphere scatters sunlight, some of which refracts onto the moon's surface. Total eclipses see the moon entirely in Earth's shadow; partial ones occur when only part of the moon is shadowed."
example_rewrite_2 = "Yo check it, when the Earth steps in, takes its place, casting shadows on the moon's face. It's a full moon night, the scene's set right, for a lunar eclipse, a celestial sight. The moon turns red, ain't no dread, it's just Earth's atmosphere playing with sunlight's thread, scattering colors, bending light, onto the moon's surface, making the night bright. Total eclipse, the moon's fully in the dark, covered by Earth's shadow, making its mark. But when it's partial, not all is shadowed, just a piece of the moon, slightly furrowed. So that's the rap, the lunar eclipse track, a dance of shadows, with no slack. Earth, moon, and sun, in a cosmic play, creating the spectacle we see today."
example_prompt_2 = "Transform your communication from an academic delivery to a dynamic, rhythm-infused presentation. Keep the essence of the information intact but weave in artistic elements, utilizing rhythm, rhyme, and a conversational style. This approach should make the content more relatable and enjoyable, aiming to both educate and entertain your audience."


# this function is used to output the right formate for each row in the dataset
def create_text_row(instruction, output, complete=True) -> str:
    if complete:
        text_row = f"""<s>[INST] {instruction} [/INST] \n {output} </s>"""
    else:
        text_row = f"""<s>[INST] {instruction} [/INST] \n {output} """
    return text_row


def get_prompt(orig_text, transformed_text, rewritten_prompt = None):
    message = create_text_row(f"{orig_prefix} {orig_text}", llm_response_for_rewrite)
    if rewritten_prompt is None:
        message = message + create_text_row(f"{rewrite_prefix} {transformed_text}", response_start, False)
    else:
        message = message + create_text_row(f"{rewrite_prefix} {transformed_text}", f"{response_start} {rewritten_prompt}")
    return message


def chat(messages):
    encodeds = tokenizer(messages, return_tensors="pt")
    
    model_inputs = encodeds.to(torch.cuda.current_device())
    
    generated_ids = model.generate(model_inputs, max_new_tokens=2000, do_sample=True)
    decoded = tokenizer.batch_decode(generated_ids)
    print(encodeds[0])
    print(decoded[0])
    return decoded[0]

chat(messages)


# interate over all the rows formate the dataset and store it in a jsonl file
def process_jsonl_file(output_file_path):
    with open(output_file_path, "w") as output_jsonl_file:
        i=0
        for item in ds["train"]:
            json_object = {
                "text": get_prompt(item["original_text"],item['rewritten_text'],item['rewrite_prompt'])
            }
            if i==0:
                chat(json_object)
            output_jsonl_file.write(json.dumps(json_object) + "\n")
            i+=1

# Provide the path where you want to save the formatted dataset
process_jsonl_file("./training_dataset.jsonl")




In [22]:
train_dataset = load_dataset('json', data_files='./training_dataset.jsonl' , split='train')

Generating train split: 9999 examples [00:00, 221916.27 examples/s]


In [27]:
train_dataset=train_dataset.train_test_split(test_size=0.1, seed=37)


In [28]:
from transformers import HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os
from datasets import load_dataset
from trl import SFTTrainer

2024-03-27 01:14:54.386645: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-27 01:14:54.386688: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-27 01:14:54.387664: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-27 01:14:54.391447: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [29]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "lm_head",]
)
model = get_peft_model(model, peft_config)


In [30]:
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=1,
    #optim="paged_adamw_32bit",
    save_total_limit = 2, 
    save_strategy = "steps", 
    load_best_model_at_end=False,
    save_steps=500,
    logging_steps=500,
    learning_rate=2e-5,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant"
)

In [31]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset['train'],
    eval_dataset=train_dataset['test'],
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False
)

Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8999/8999 [00:00<00:00, 9157.25 examples/s]
Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 9150.46 examples/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [32]:
new_model='Mistral-7b-instruct-v0.1-qlora'
#new_model='Mistral-7b-v0.1-qlora'
trainer.train()
trainer.model.save_pretrained(new_model)



Step,Training Loss
500,1.5231
1000,1.4075
1500,1.3765
2000,1.3668


Checkpoint destination directory ./results/checkpoint-1500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-2000 already exists and is non-empty. Saving will proceed but saved results may be invalid.


In [33]:
# Merge the model with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16, #bfloat and float?
    device_map="auto",
)
merged_model= PeftModel.from_pretrained(base_model, new_model)
merged_model= merged_model.merge_and_unload()

# Save the merged model
merged_model.save_pretrained("merged_model",safe_serialization=True)
tokenizer.save_pretrained("merged_model")

Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:02<00:00,  1.44s/it]


('merged_model/tokenizer_config.json',
 'merged_model/special_tokens_map.json',
 'merged_model/tokenizer.model',
 'merged_model/added_tokens.json',
 'merged_model/tokenizer.json')

In [36]:
from random import randrange
import re


# def extract_text_after_response_start(full_text):
#     parts = full_text.rsplit(response_start, 1)  # Split from the right, ensuring only the last occurrence is considered
#     if len(parts) > 1:
#         return parts[1].strip()  # Return text after the last occurrence of response_start
#     else:
#         return full_text  # Return the original text if response_start is not found

eos_token="</s>"
base_line=base_line = 'Refine the following passage by emulating the writing style of [insert desired style here], with a focus on enhancing its clarity, elegance, and overall impact. Preserve the essence and original meaning of the text, while meticulously adjusting its tone, vocabulary, and stylistic elements to resonate with the chosen style.Please improve the following text using the writing style of, maintaining the original meaning but altering the tone, diction, and stylistic elements to match the new style.Enhance the clarity, elegance, and impact of the following text by adopting the writing style of , ensuring the core message remains intact while transforming the tone, word choice, and stylistic features to align with the specified style.' 

def extract_text_after_response_start(full_text):
    parts = re.search(f"{response_start}(.*?){eos_token}", full_text).groups()
    print(full_text)
    print(parts)
    if parts:
        return parts
    else:
        return base_line
        

def sample_pred(sample):
    prompt = get_prompt(sample["original_text"],sample['rewritten_text'])

    with torch.inference_mode():
        input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()
        # with torch.inference_mode():
        # outputs = merged_model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9,temperature=0.5)
        outputs = merged_model.generate(input_ids=input_ids, max_new_tokens=1024, do_sample=False)
        
        rewrite_prompt_pred=extract_text_after_response_start(tokenizer.batch_decode(outputs.detach().cpu().numpy())[0])
    
    # print(f"Prompt:\n{prompt}\n")
    # print(f"\nGenerated instruction:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]}")
    # print(f"\nGround truth:\n{sample['rewrite_prompt']}")
    
    # print(f"Prompt:\n{prompt}\n")
    # print("**********************")
    # print("**********************")
    # print(f"\nGenerated instruction:\n{rewrite_prompt_pred}")
    # print("**********************")
    # print("**********************")
    # print(f"\nGround truth:\n{sample['rewrite_prompt']}")

for i in range(3):
    sample = ds["train"][randrange(len(train_dataset ))]
    sample_pred(sample)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


<s><s> [INST] Original Text:  Albuquerque police have released lapel camera footage that shows an officer shooting at a fleeing suspect. In October 2013, officers were called about an erratic driver who was running drivers off the road and firing a weapon from his vehicle. Officers said the man, Joaquin Ortega, tried to carjack drivers after crashing his own car. The newly-released footage seems to contradict the story that police initially reported. Then-Interim Police Chief Allen Banks would not release more than a single frame of the lapel video after the incident, stating that he wasn’t going to allow the case to be tried in [/INST] 
 Provide the modified text and I'll tell you a something general that changed about it.  I'll avoid any specifics though.  My tone will be neutral. </s><s> [INST] 
Rewritten Text:  

**DANGER! POLICE SHOOTING AT SUSPECT**

Wanted: Joaquin Ortega
Age: 32
Last known: Driving erratically, firing weapon from vehicle.

Don't miss this thrilling spectacle! W

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


<s><s> [INST] Original Text:  Albuquerque police have released lapel camera footage that shows an officer shooting at a fleeing suspect. In October 2013, officers were called about an erratic driver who was running drivers off the road and firing a weapon from his vehicle. Officers said the man, Joaquin Ortega, tried to carjack drivers after crashing his own car. The newly-released footage seems to contradict the story that police initially reported. Then-Interim Police Chief Allen Banks would not release more than a single frame of the lapel video after the incident, stating that he wasn’t going to allow the case to be tried in [/INST] 
 Provide the modified text and I'll tell you a something general that changed about it.  I'll avoid any specifics though.  My tone will be neutral. </s><s> [INST] 
Rewritten Text:  

**DANGER! POLICE SHOOTING AT SUSPECT**

Wanted: Joaquin Ortega
Age: 32
Last known: Driving erratically, firing weapon from vehicle.

Don't miss this thrilling spectacle! W

In [None]:
x=tokenizer.batch_decode(outputs.detach().cpu().numpy())

In [20]:
print(x[0])

[INST] Original Text:  Albuquerque police have released lapel camera footage that shows an officer shooting at a fleeing suspect. In October 2013, officers were called about an erratic driver who was running drivers off the road and firing a weapon from his vehicle. Officers said the man, Joaquin Ortega, tried to carjack drivers after crashing his own car. The newly-released footage seems to contradict the story that police initially reported. Then-Interim Police Chief Allen Banks would not release more than a single frame of the lapel video after the incident, stating that he wasn’t going to allow the case to be tried in [/INST] 
 The request was:  Provide the modified text and I'll tell you a something general that changed about it.  I'll avoid any specifics though.  My tone will be neutral.  [INST] 
Rewritten Text:  

**DANGER! POLICE SHOOTING AT SUSPECT**

Wanted: Joaquin Ortega
Age: 32
Last known: Driving erratically, firing weapon from vehicle.

Don't miss this thrilling spectacl