<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/TransformerLibrary_Linkedin_Project_4_mistral_v2_7b_finetunning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://facebookresearch.github.io/xformers/_modules/xformers/ops/fmha/attn_bias.html

In [None]:
!pip install -U xformers trl peft accelerate bitsandbytes torch torchvision torchaudio datasets packaging transformers -q

In [None]:
!pip uninstall -y xformers
!pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu118 -q

In [4]:
import torch
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA L4. Max memory = 22.161 GB.
0.0 GB of memory reserved.


In [None]:
# --- Model Loading, Data Processing, Trainer Initialization ---

import torch
from datasets import load_dataset
from trl import SFTTrainer
from transformers import (
    TrainingArguments,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
)

max_seq_length = 2048
dtype = None
load_in_4bit = True

print("Loading model and tokenizer...")

# Configure 4-bit quantization using standard transformers and bitsandbytes
bnb_config = BitsAndBytesConfig(
    load_in_4bit=load_in_4bit,
    bnb_4bit_quant_type="nf4", # Or "fp4"
    # Use torch.float16 or torch.bfloat16 based on hardware support
    # torch.bfloat16 is preferred on newer GPUs (Ampere+)
    # torch.float16 is typically used on older GPUs (T4, V100)
    bnb_4bit_compute_dtype=torch.bfloat16, # Recommended if your GPU supports it
    bnb_4bit_use_double_quant=True,
)

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

# Load the model with quantization config
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
)

# Set padding token if it doesn't exist (common for some models)
if tokenizer.pad_token is None:
    # For instruct models, the EOS token is often used as pad token
    tokenizer.pad_token = tokenizer.eos_token


print("Configuring PEFT model...")

# Prepare the model for k-bit training (necessary for 4-bit)
model = prepare_model_for_kbit_training(model)

# Define LoRA configuration
lora_config = LoraConfig(
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    task_type = "CAUSAL_LM", # Important for Causal Language Modeling
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

print("Loading and processing dataset...")
# Load the dataset
dataset = load_dataset("zefang-liu/phishing-email-dataset", split = "train")
# Removing unncessory columns
dataset = dataset.remove_columns(['Unnamed: 0'])

# Define the prompt template
phishing_prompt = """Below is an email body. Determine if the email is safe or phishing.

### Email Text:
{}

### Email Type:
{}"""

EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

# Define the formatting function
def formatting_prompts_func(examples):
    email_texts = examples["Email Text"]
    email_types = examples["Email Type"]
    texts = []
    for email_text, email_type in zip(email_texts, email_types):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = phishing_prompt.format(email_text, email_type) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

# Apply the formatting function to the dataset
dataset = dataset.map(formatting_prompts_func, batched=True)

In [6]:
# --- SFTTrainer Initialization ---

print("Initializing SFTTrainer...")

use_fp16 = True
use_bf16 = False

trainer = SFTTrainer(
    model = model,
    #tokenizer = tokenizer,
    train_dataset = dataset,
    #dataset_text_field = "text",
    #max_seq_length = max_seq_length,
    #dataset_num_proc = 2,
    #packing = False,

    # Training arguments
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 10,
        learning_rate = 2e-4,
        fp16 = use_fp16,
        bf16 = use_bf16,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

print("SFTTrainer initialized successfully.")

# --- Training Execution ---
print("Starting model training...")
trainer_stats = trainer.train()
print("Training completed.")

Initializing SFTTrainer...


Converting train dataset to ChatML:   0%|          | 0/18650 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/18650 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/18650 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/18650 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


SFTTrainer initialized successfully.
Starting model training...


  return fn(*args, **kwargs)


Step,Training Loss
1,2.7739
2,2.4212
3,2.3608
4,2.5918
5,2.793
6,2.4977
7,2.4175
8,1.8403
9,2.0656
10,2.4615


Training completed.


Final Memory State

In [7]:
!nvidia-smi

Mon Jun  2 17:42:49 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   69C    P0             34W /   72W |    7751MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [8]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

130.187 seconds used for training.
2.17 minutes used for training.
Peak reserved memory = 7.33 GB.
Peak reserved memory for training = 7.33 GB.
Peak reserved memory % of max memory = 33.076 %.
Peak reserved memory for training % of max memory = 33.076 %.


## Inference
Let's run the model! </br>
Give the mail body in the input box and it will show the Mail type whether it Safe mail or Phishing Mail.

In [None]:
!pip install unsloth -q

In [None]:
print("Loading model and tokenizer...")
from unsloth import FastLanguageModel

In [None]:
print(model)

In [15]:
# Define the phishing email prompt
phishing_prompt = """Below is an email body. Determine if the email is safe or phishing.

### Email Text:
{}

### Email Type:
{}"""

# Enable native 2x faster inference
FastLanguageModel.for_inference(model)

# Example email text to classify
email_text_example = input("Please Enter mail body to Check if it is phishing or not : ")

# Prepare the inputs for the model
inputs = tokenizer(
[
    phishing_prompt.format(
        email_text_example,  # email text
        ""  # email type - leave this blank for generation!
    )
], return_tensors="pt").to("cuda")

# Generate the outputs using the model
outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
print(tokenizer.batch_decode(outputs))

# For streaming inference
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)

Please Enter mail body to Check if it is phishing or not : Dear Customer,  Please find attached your monthly bank statement for June 2024. If you have any questions, please contact our support team.  Thank you for banking with us.  Best regards, Your Bank
['<s> Below is an email body. Determine if the email is safe or phishing.\n\n### Email Text:\nDear Customer,  Please find attached your monthly bank statement for June 2024. If you have any questions, please contact our support team.  Thank you for banking with us.  Best regards, Your Bank\n\n### Email Type:\nSafe Email</s>']
<s> Below is an email body. Determine if the email is safe or phishing.

### Email Text:
Dear Customer,  Please find attached your monthly bank statement for June 2024. If you have any questions, please contact our support team.  Thank you for banking with us.  Best regards, Your Bank

### Email Type:
Safe Email</s>


#Saving the finetuned model

In [16]:
model.save_pretrained("mistral_v3_phishing") # Local saving
tokenizer.save_pretrained("mistral_v3_phishing")

('mistral_v3_phishing/tokenizer_config.json',
 'mistral_v3_phishing/special_tokens_map.json',
 'mistral_v3_phishing/chat_template.jinja',
 'mistral_v3_phishing/tokenizer.model',
 'mistral_v3_phishing/added_tokens.json',
 'mistral_v3_phishing/tokenizer.json')

**Use saved model to generate inference**

In [17]:
if False:
  from unsloth import FastLanguageModel
  model, tokenizer = FastLanguageModel.from_pretrained(
      model_name = "mistral_v3_phishing", #model which i have saved.
      max_seq_length = max_seq_length,
      dtype = dtype,
      load_in_4bit = load_in_4bit,
  )

  FastLanguageModel.for_inference(model) #Enable 2x faster inference



In [18]:
#lets define the phishing prompt again

phishing_prompt = """ Below is an email body. Determine if the email is safe or phishing.

### Email Text:
{}

### Email Type:
{}"""

In [20]:
#Take the input of Example email to classify whether it is safe or phshing
email_text_example = input("Enter the Email body to classify whether it is safe or phishing: ")

#lets prepare the input for the model
inputs = tokenizer(
    [
        phishing_prompt.format(
            email_text_example,  #email text
            "" #email_type  leave this blank for generation.
        )
    ], return_tensors="pt").to("cuda")


#generate the output using the model
outputs = model.generate(**inputs,max_new_tokens=64,use_cache=True)
print(tokenizer.batch_decode(outputs))


# For streaming inference
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)

Enter the Email body to classify whether it is safe or phishing: Dear Valued Customer,  Thank you for your recent purchase with us. Your invoice is attached to this email. If you have any questions about your order, please contact our support team.  [Download Invoice](http://phishing-link.com)  We appreciate your business.  Best regards, Customer Support Team
['<s> Below is an email body. Determine if the email is safe or phishing.\n\n### Email Text:\nDear Valued Customer,  Thank you for your recent purchase with us. Your invoice is attached to this email. If you have any questions about your order, please contact our support team.  [Download Invoice](http://phishing-link.com)  We appreciate your business.  Best regards, Customer Support Team\n\n### Email Type:\nPhishing Email</s>']
<s> Below is an email body. Determine if the email is safe or phishing.

### Email Text:
Dear Valued Customer,  Thank you for your recent purchase with us. Your invoice is attached to this email. If you hav