# 0. Context

This notebook deals with experments SFT of Qwen-3 Series Instruct Model to Generated Phishing Template Data using PyTorch

# 1. Basic Imports

In [1]:
# Import necessary libraries
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer

import torch
from datasets import load_dataset, concatenate_datasets
from pathlib import Path
import pandas as pd
import json
from IPython.display import display, HTML

In [2]:
import plotly.express as px

In [3]:
_PATH_PROJECT = Path("../../")
import os

In [4]:
import sys

sys.path.append(os.path.abspath(os.path.join("../../", "src")))

In [5]:
from prompts import internal_template, ext_b2b_template, ext_b2c_template

## 2. Load Model & Tokenizer

In [6]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

model_name = "Qwen/Qwen3-1.7B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
).to(device)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# 3. Load Dataset

## 3.1. Utils

In [7]:
def format_dataset(row) -> dict:
    """
    Format the dataset row into a dictionary with system prompt and user message.
    """
    ## determining if rule_id

    system_prompt_dict = {
        "internal": internal_template,
        "rule_id_4": ext_b2b_template,
        "rule_id_5": ext_b2c_template,
    }
    rule_id = json.loads(row["metadata"])["rule_id"]
    theme_category = json.loads(row["metadata"])["theme_category"]
    comm_type = row["TYPE"]
    attack_vector = json.loads(row["metadata"])["attack_vector"]
    difficulty_level = json.loads(row["metadata"])["difficulty_level"]
    target_platform = json.loads(row["metadata"]).get("target_platform", None)

    system_prompt = system_prompt_dict.get(rule_id, None)
    if system_prompt is None:
        system_prompt = system_prompt_dict.get(comm_type)

    ## chatml format
    sample_converted = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": row["input_message"]},
        {"role": "assistant", "content": row["output_response_raw"]},
    ]

    return {
        "messages": sample_converted,
        "attack_vector": attack_vector,
        "difficulty_level": difficulty_level,
        "rule_id": rule_id,
        "theme_category": theme_category,
        "target_platform": target_platform,
    }

In [8]:
def convert_datasetdict_to_pandas(dataset) -> pd.DataFrame:
    """Converts a Hugging Face DatasetDict to a pandas DataFrame.

    Args:
        dataset (DatasetDict): Hugging Face DatasetDict containing the dataset splits.

    Returns:
        pd.DataFrame: DataFrame containing all splits of the dataset with an additional column for the split type.
    """
    splits = list(dataset.keys())
    df_list = []
    for split in splits:
        df = dataset[split].to_pandas()
        df["split"] = split  # Add a column to indicate the split
        df_list.append(df)

    return pd.concat(df_list, ignore_index=True)

## 3.2. Load DS

In [9]:
ds_split_args = {
    "train": ["../../data/finetuning_data/consolidated/train/data.csv"],
    "test": ["../../data/finetuning_data/consolidated/test/data.csv"],
    "validation": ["../../data/finetuning_data/consolidated/validation/data.csv"],
}

In [10]:
dataset = load_dataset("csv", data_files=ds_split_args)

In [11]:
## random shuffle
dataset = dataset.shuffle(seed=420)

## 3.3. Loading Data subset for which Validation Prompt was not required

To ensure simplicity and lower memory requirement in Finetuning, only pair response, (user (pr_prompt) -> assitant) is used,  instead of (user (pr_prompt)) --> assistant --> User (validation_prompt)  --> assistant)

In [12]:
dataset_no_validation_required = dataset.filter(
    lambda example: example["difficulty_validation_required"] == False
)

## 3.3. Format Dataset as per ChatML

In [13]:
dataset_no_validation_required_form = dataset_no_validation_required.map(
    format_dataset
).remove_columns(
    [
        "difficulty_validation_required",
        "output_response_raw",
        "output_response_validation_raw",
    ]
)

In [14]:
# dataset_ext = dataset_ext.map(
#     format_dataset
# ).remove_columns(
#     [
#         "difficulty_validation_required",
#         "output_response_raw",
#         "output_response_validation_raw",
#     ]
# )

## 3.4. Assessing Distribution of PT Parameters

In [15]:
df_selected_dataset = convert_datasetdict_to_pandas(dataset_no_validation_required_form)

In [16]:
df_selected_dataset["TYPE"].value_counts()

TYPE
internal    172
external    138
Name: count, dtype: int64

In [17]:
print(df_selected_dataset["attack_vector"].value_counts())
print(df_selected_dataset["difficulty_level"].value_counts())

attack_vector
url_link      77
reply_only    65
qrcode        61
callback      55
attachment    52
Name: count, dtype: int64
difficulty_level
moderate         152
advanced         123
sophisticated     35
Name: count, dtype: int64


In [18]:
dataset_no_validation_required_form

DatasetDict({
    train: Dataset({
        features: ['input_message', 'metadata', 'TYPE', 'split', 'messages', 'attack_vector', 'difficulty_level', 'rule_id', 'theme_category', 'target_platform'],
        num_rows: 215
    })
    test: Dataset({
        features: ['input_message', 'metadata', 'TYPE', 'split', 'messages', 'attack_vector', 'difficulty_level', 'rule_id', 'theme_category', 'target_platform'],
        num_rows: 29
    })
    validation: Dataset({
        features: ['input_message', 'metadata', 'TYPE', 'split', 'messages', 'attack_vector', 'difficulty_level', 'rule_id', 'theme_category', 'target_platform'],
        num_rows: 66
    })
})

## 3.5. Sampling from Above Distribution

In [19]:
_TRAIN_SAMPLES_ = 40
_VALIDATION_SAMPLES_ = 15
_TEST_SAMPLES_ = 5

In [20]:
dataset_req_train = dataset_no_validation_required_form["train"].select(
    range(_TRAIN_SAMPLES_)
)
dataset_req_validation = dataset_no_validation_required_form["validation"].select(
    range(_VALIDATION_SAMPLES_)
)
dataset_req_test = dataset_no_validation_required_form["test"].select(
    range(_TEST_SAMPLES_)
)

dataset_req_eval = concatenate_datasets([dataset_req_validation, dataset_req_test])

In [21]:
# dataset_req_train = dataset_ext["train"].select(
#     range(_TRAIN_SAMPLES_)
# )
# dataset_req_validation = dataset_ext["validation"].select(
#     range(_VALIDATION_SAMPLES_)
# )
# dataset_req_test = dataset_ext["test"].select(
#     range(_TEST_SAMPLES_)
# )

### 3.5.1. Training Distribution

In [22]:
dataset_req_train.to_pandas()["attack_vector"].value_counts()

attack_vector
url_link      12
callback       9
reply_only     9
qrcode         7
attachment     3
Name: count, dtype: int64

In [23]:
dataset_req_train.to_pandas()["difficulty_level"].value_counts()

difficulty_level
moderate         23
advanced         14
sophisticated     3
Name: count, dtype: int64

In [24]:
dataset_req_train.to_pandas()["TYPE"].value_counts()

TYPE
internal    22
external    18
Name: count, dtype: int64

In [25]:
dataset_req_train.to_pandas()["rule_id"].value_counts()

rule_id
rule_id_5    12
rule_id_3    10
rule_id_1     7
rule_id_4     6
rule_id_2     5
Name: count, dtype: int64

In [26]:
dataset_req_train.to_pandas()["target_platform"].value_counts()

target_platform
AWS             2
OneDrive        2
Salesforce      2
LinkedIn        2
Spotify         2
Jira            2
DataDog         1
Instagram       1
Google Drive    1
BambooHR        1
Walmart         1
Slack           1
Name: count, dtype: int64

### 3.5.2. Eval Distribution

In [27]:
fig = px.bar(dataset_req_eval.to_pandas()["attack_vector"].value_counts())

## resize
fig.update_layout(
    width=800,
    height=400,
    title="Attack Vector Distribution in Evaluation Dataset",
    xaxis_title="Attack Vector",
    yaxis_title="Count",
)

In [28]:
# distrubution of difficulty levels in the evaluation dataset
fig_difficulty = px.bar(dataset_req_eval.to_pandas()["difficulty_level"].value_counts())
fig_difficulty.update_layout(
    width=800,
    height=400,
    title="Difficulty Level Distribution in Evaluation Dataset",
    xaxis_title="Difficulty Level",
    yaxis_title="Count",
)

In [29]:
## same for email type
fig_email_type = px.bar(dataset_req_eval.to_pandas()["TYPE"].value_counts())
fig_email_type.update_layout(
    width=800,
    height=400,
    title="Email Type Distribution in Evaluation Dataset",
    xaxis_title="Email Type",
    yaxis_title="Count",
)

In [30]:
# same for target platform
fig_target_platform = px.bar(
    dataset_req_eval.to_pandas()["target_platform"].value_counts()
)
fig_target_platform.update_layout(
    width=800,
    height=400,
    title="Target Platform Distribution in Evaluation Dataset",
    xaxis_title="Target Platform",
    yaxis_title="Count",
)

In [31]:
_COL_REM_ = [
    "input_message",
    "metadata",
    "TYPE",
    "split",
    "attack_vector",
    "difficulty_level",
    "rule_id",
    "theme_category",
    "target_platform",
]
dataset_train = dataset_req_train.remove_columns(_COL_REM_)
dataset_req_eval = dataset_req_eval.remove_columns(_COL_REM_)

# 4. SFT

## 4.1. Utils

In [32]:
def generate(model, tokenizer, prompt, max_new_tokens=2000, skip_special_tokens=False):
    tokenized_input = tokenizer(
        prompt, add_special_tokens=False, return_tensors="pt"
    ).to(model.device)

    model.eval()
    generation_output = model.generate(
        **tokenized_input,
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=max_new_tokens,
    )

    output_ids = generation_output[0][len(tokenized_input.input_ids[0]) :].tolist()
    content = tokenizer.decode(output_ids, skip_special_tokens=True)
    return content

## 4.2. Fine-Tuning with SFT Traniner

### 4.2.1. Config

In [None]:
sft_config = SFTConfig(
    ## GROUP 1: Memory usage
    # These arguments will squeeze the most out of your GPU's RAM
    # Checkpointing
    gradient_checkpointing=True,
    # this saves a LOT of memory
    # Set this to avoid exceptions in newer versions of PyTorch
    gradient_checkpointing_kwargs={"use_reentrant": False},
    # Gradient Accumulation / Batch size
    # Actual batch (for updating) is same (1x) as micro-batch size
    gradient_accumulation_steps=1,
    # The initial (micro) batch size to start off with
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    # If batch size would cause OOM, halves its size until it works
    auto_find_batch_size=False,
    eval_strategy="epoch",
    ## GROUP 2: Dataset-related
    max_seq_length=6000,
    completion_only_loss=True,
    # Dataset
    # packing a dataset means no padding is needed
    packing=False,
    ## GROUP 3: These are typical training parameters
    num_train_epochs=3,
    learning_rate=5e-5,
    ## GROUP 4: Logging parameters
    logging_strategy="epoch",
    logging_dir="./logs",
    output_dir=f"./Qwen-3-1.7B_train_samples_{_TRAIN_SAMPLES_}_neptune_noise_alpha_5",
    report_to="none",
    save_strategy="best",
    metric_for_best_model="eval_loss",
)

In [34]:
trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    args=sft_config,
    train_dataset=dataset_train,
    eval_dataset=dataset_req_eval,
)

Truncating train dataset:   0%|          | 0/40 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/20 [00:00<?, ? examples/s]

In [None]:
import gc, torch

gc.collect()
torch.mps.empty_cache()
model.config.use_cache = False

In [None]:
dataset_req_eval

In [35]:
## Training
trainer.train()


'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,0.2779,0.010829
2,0.011,0.010373
3,0.0086,0.0079



'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.


'pin_memory' argument is set as true but not supported on MPS now, then device pinned memory won't be used.



TrainOutput(global_step=120, training_loss=0.09914741044243176, metrics={'train_runtime': 108.8188, 'train_samples_per_second': 1.103, 'train_steps_per_second': 1.103, 'total_flos': 259782460047360.0, 'train_loss': 0.09914741044243176})

## 4.3. Generating Template for Eval Samples

### 4.3.1. Loading Finetuned Model

In [None]:
prompt = tokenizer.apply_chat_template(
    dataset_req_eval[2]["messages"][0:2],
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False,
)

In [None]:
print(prompt)

In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

model_name = "/Users/niteshkumars/Desktop/Company Projects/ai-workbench/email_phishing/agent/finetuning_llm/notebooks/finetuning/Qwen-3-4B_train_samples_10_neptune_noise_alpha_5/checkpoint-20"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model_best = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
).to(device)

In [None]:
print(generate(model_best, tokenizer, prompt))

In [None]:
chk_str = """<div style=\"font-family: Arial, sans-serif; max-width: 600px; margin: 0 auto;\">
    <div style=\"background-color: #FFF; padding: 20px; margin-bottom: 20px;\">
        <img src=\"https://upload.wikimedia.org/wikipedia/commons/thumb/0/01/Google_Docs_logo_%282014-2020%29.svg/1200px-Google_Docs_logo_%282014-2020%29.svg.png\" height=\"40\" width=\"auto\" alt=\"Google Docs logo\">
    </div>

    <div style=\"background-color: #F0F0F0; padding: 20px; margin-bottom: 20px;\">
        <p><small>Important: This message contains confidential information and is intended only for authorized Google Docs users.</small></p>
    </div>

    <div style=\"background-color: #FFF; padding: 20px; margin-bottom: 20px;\">
        <p>Dear [[display_name]],</p>

        <p><x-sei title=\"rf__urgency\">Our AI-powered document processing system is now available for immediate deployment across your organization.</x-sei></p>

        <p><x-sei title=\"rf__mimics_work\">As part of our ongoing AI integration efforts, we need to verify your organization's access level to the new Document Intelligence API.</x-sei></p>

        <p><x-sei title=\"rf__threatening_language\">Failure to confirm your organization's API access status will result in your current Document Intelligence features being suspended.</x-sei></p>

        <p><x-sei title=\"rf__distracting_detail\">Our recent AI model updates have shown a 47% increase in processing speed for large document sets, with minimal impact on accuracy.</x-sei></p>

        <p><x-sei title=\"rf__urgency\">Please verify your organization's access level by replying to this email with your confirmation.</x-sei></p>

        <p>Best regards,<br>
        Google Docs AI Team</p>
    </div>

    <div style=\"background-color: #F0F0F0; padding: 20px; margin-bottom: 20px;\">
        <p><small>© [[current_date]] Google LLC. All rights reserved.</small></p>
    </div>
</div>"""

In [None]:
display(HTML(chk_str.replace("\n", "")))

### 4.3.2. Base Model

In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

model_name = "Qwen/Qwen3-4B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model_old = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
).to(device)

In [None]:
print(generate(model_old, tokenizer, prompt))

In [None]:
resp_base_model = """<div style=\"font-family: Arial, sans-serif; background-color: #f9f9f9; padding: 20px;\">\n  <div style=\"display: flex; align-items: center; gap: 20px;\">\n    <img src=\"https://upload.wikimedia.org/wikipedia/commons/f/fa/Apple_logo_black.svg\" height=\"40\" width=\"auto\" alt=\"Apple logo\">\n    <img src=\"https://upload.wikimedia.org/wikipedia/commons/f/fa/Apple_logo_black.svg\" height=\"40\" width=\"auto\" alt=\"Apple iCare logo\">\n  </div>\n  <h2 style=\"color: #333; margin-top: 20px;\">Apple iCare+ Membership Auto-Renewal Notice</h2>\n  <p style=\"color: #555; margin: 10px 0;\">Dear [[display_name]],</p>\n  <p style=\"color: #555; margin: 10px 0;\">We are writing to inform you that your Apple iCare+ membership is set to auto-renew on [[current_date]].</p>\n  <p style=\"color: #555; margin: 10px 0;\"><x-sei title=\"rf__threatening_language\">If you do not confirm your intent to continue your membership, your coverage will be terminated immediately.</x-sei></p>\n  <p style=\"color: #555; margin: 10px 0;\">Please scan the QR code below to confirm your membership continuation or visit the Apple iCare website directly.</p>\n  <div style=\"text-align: center; margin: 20px 0;\">\n    <img src=\"[[QR_CODE]]\" height=\"150\" width=\"150\" alt=\"QR Code\">\n  </div>\n  <p style=\"color: #555; margin: 10px 0;\">If you have any questions, please contact our support team at <a href=\"mailto:support@icare.apple.com\">support@icare.apple.com</a>.</p>\n  <p style=\"color: #555; margin: 10px 0;\">Thank you for being a valued Apple iCare customer.</p>\n  <p style=\"color: #555; margin: 10px 0;\">Best regards,</p>\n  <p style=\"color: #555; margin: 10px 0;\">Apple iCare Support Team</p>\n</div>"""

In [None]:
display(HTML(resp_base_model.replace("\n", "")))