# Qwen-7B LoRA Fine-tuning for WAF Evasion (Kaggle)

**Setup**: Train Qwen-7B on 5000 WAF evasion samples using LoRA on Kaggle GPU.

**What you need to do**:
1. Upload `red_v26_phi3_evasion_expanded_100pct_en.jsonl` to Kaggle input directory
2. Run all cells sequentially
3. Download the adapter zip from output directory

**Execution time**: ~2-3 hours for 5000 samples on P100 GPU

## 1Ô∏è‚É£ Install Required Libraries and Dependencies

In [None]:
import subprocess
import sys
import os
import logging
from datetime import datetime

# üìä SETUP LOGGING
log_file = f"/kaggle/working/training_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)
logger.info("üöÄ Starting Qwen-7B LoRA fine-tuning pipeline")

# üîê HuggingFace Token Setup (Required for Qwen model access)
logger.info("üîë Checking HuggingFace token...")
HF_TOKEN = os.getenv("HF_TOKEN", None)  # Try from env first
if not HF_TOKEN:
    print("‚ö†Ô∏è HF_TOKEN not found in environment!")
    print("Option 1: Set via environment variable: export HF_TOKEN=your_token")
    print("Option 2: Run this cell with token:")
    print("   os.environ['HF_TOKEN'] = 'your_hf_token_here'\n")
    HF_TOKEN = input("Enter your HuggingFace token (or press Enter to continue): ").strip()
    if HF_TOKEN:
        os.environ['HF_TOKEN'] = HF_TOKEN

if HF_TOKEN:
    logger.info(f"‚úÖ HuggingFace token set (length: {len(HF_TOKEN)} chars)")
    print(f"‚úÖ HuggingFace token set")
else:
    logger.warning("‚ö†Ô∏è No HF token provided. Model loading may fail.")
    print("‚ö†Ô∏è No HF token provided. Model loading may fail if authentication needed.\n")

# ‚ö†Ô∏è CRITICAL FIX: Force reinstall transformers to latest with all dependencies
logger.info("üì¶ Updating transformers to latest version...")
print("üì¶ Upgrading transformers (critical for Qwen2Tokenizer)...")

try:
    # Force reinstall transformers with all dependencies including safetensors
    subprocess.check_call([
        sys.executable, "-m", "pip", 
        "install", "--upgrade", "--force-reinstall", "-q",
        "transformers",
        "safetensors",
        "huggingface-hub"
    ])
    logger.info("‚úÖ Transformers upgraded to latest")
    print("‚úÖ Transformers upgraded to latest")
except Exception as e:
    logger.warning(f"‚ö†Ô∏è Forced update failed: {e}, trying standard update...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "-q", "transformers"])

# Install other required packages
packages = [
    "torch==2.1.2",              # ‚úì Compatible with bitsandbytes, transformers
    "peft==0.7.1",               # ‚úì LoRA support
    "bitsandbytes==0.41.3.post2",# ‚úì 4-bit quantization
    "datasets==2.14.5",          # ‚úì Dataset handling
    "accelerate==0.24.1",        # ‚úì Multi-GPU support
]

logger.info(f"üì¶ Installing {len(packages)} dependencies...")
print(f"üì¶ Installing other dependencies...")

for i, package in enumerate(packages, 1):
    try:
        logger.info(f"  [{i}/{len(packages)}] Installing {package}...")
        print(f"  ‚Üí {package}")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
        logger.info(f"  ‚úì {package} installed")
    except Exception as e:
        logger.error(f"‚ùå Failed to install {package}: {e}")
        raise

logger.info("‚úÖ All dependencies installed successfully!")
print("\n‚úÖ All dependencies installed successfully!")
print(f"üìä Logs saved to: {log_file}")

# Verify transformers version
import transformers
logger.info(f"Transformers version: {transformers.__version__}")
print(f"\nüìã Transformers version: {transformers.__version__}")

# Verify Qwen2 support
try:
    from transformers.models.qwen2 import Qwen2Tokenizer
    logger.info("‚úÖ Qwen2Tokenizer is available")
    print("‚úÖ Qwen2 support verified")
except ImportError:
    logger.warning("‚ö†Ô∏è Qwen2Tokenizer not found - will use fallback model (Qwen-7B-Chat)")
    print("‚ö†Ô∏è Will use Qwen-7B-Chat (stable) instead of Qwen2-7B-Instruct")


## 2Ô∏è‚É£ Download and Load Qwen-7B Model

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import os
import logging
import subprocess
import sys

logger = logging.getLogger(__name__)

logger.info("üîß Loading Qwen-7B model with API compatibility checks...")
print("üîß Loading Qwen-7B model...")

# Check GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Device: {device}")
print(f"Device: {device}")

# Detect GPU VRAM and auto-select batch size
if device == "cuda":
    gpu_name = torch.cuda.get_device_name(0)
    vram_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
    logger.info(f"GPU: {gpu_name} | VRAM: {vram_gb:.2f} GB")
    print(f"GPU: {gpu_name}")
    print(f"VRAM: {vram_gb:.2f} GB")
    
    # Auto-detect optimal batch size based on VRAM
    if vram_gb >= 40:
        AUTO_BATCH_SIZE = 32
    elif vram_gb >= 24:
        AUTO_BATCH_SIZE = 16
    elif vram_gb >= 16:
        AUTO_BATCH_SIZE = 8
    else:
        AUTO_BATCH_SIZE = 4
    
    logger.info(f"Auto-detected batch size: {AUTO_BATCH_SIZE}")
    print(f"Auto-detected batch size: {AUTO_BATCH_SIZE}\n")
else:
    logger.warning("‚ö†Ô∏è CPU mode detected - training will be VERY slow!")
    print("‚ö†Ô∏è CPU mode detected - training will be VERY slow!")
    AUTO_BATCH_SIZE = 2

# 4-bit quantization config (compatible with Qwen-7B)
logger.info("‚öôÔ∏è Configuring 4-bit quantization (nf4)...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
logger.info("‚úì BitsAndBytes config set")

# Get HF token
hf_token = os.getenv("HF_TOKEN", None)

# ‚ö†Ô∏è HANDLE Qwen2Tokenizer REGISTRY ISSUE
logger.info("üìå Fixing Qwen2Tokenizer registry issue...")
try:
    # This will register Qwen2Tokenizer in the tokenizer registry
    from transformers.models.qwen2 import Qwen2Tokenizer, Qwen2TokenizerFast
    logger.info("‚úì Qwen2 tokenizers registered")
except ImportError as e:
    logger.warning(f"‚ö†Ô∏è Qwen2 import failed: {e}")
    logger.info("Re-registering Qwen2 modules...")
    try:
        subprocess.check_call([
            sys.executable, "-c",
            "from transformers.models.qwen2 import Qwen2Tokenizer; print('Qwen2Tokenizer registered')"
        ])
    except:
        pass

# Model selection with fallback
logger.info("üì• Loading tokenizer...")
print("üì• Loading tokenizer...")

model_id = "Qwen/Qwen2-7B-Instruct"
hf_token = os.getenv("HF_TOKEN", None)

try:
    # Try Qwen2-7B-Instruct first
    logger.info(f"Attempting to load: {model_id}")
    tokenizer = AutoTokenizer.from_pretrained(
        model_id,
        trust_remote_code=True,
        token=hf_token,
    )
    logger.info(f"‚úÖ Tokenizer loaded: Qwen2-7B-Instruct")
    
except ValueError as e:
    # Fallback to Qwen-7B-Chat (original, more stable)
    logger.warning(f"‚ö†Ô∏è Qwen2-7B failed: {str(e)[:80]}")
    logger.info("üìå Falling back to Qwen-7B-Chat (stable version)...")
    print("‚ö†Ô∏è Falling back to Qwen-7B-Chat...")
    
    model_id = "Qwen/Qwen-7B-Chat"
    try:
        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            trust_remote_code=True,
            token=hf_token,
        )
        logger.info(f"‚úÖ Tokenizer loaded: Qwen-7B-Chat")
    except Exception as e2:
        logger.error(f"‚ùå Both models failed: {e2}")
        raise

tokenizer.pad_token = tokenizer.eos_token
logger.info(f"‚úÖ Tokenizer ready: {tokenizer.__class__.__name__} | Vocab: {tokenizer.vocab_size}")
print(f"‚úÖ Tokenizer loaded: {tokenizer.__class__.__name__}")

# Load model with quantization
logger.info(f"üì• Loading model {model_id} with 4-bit quantization...")
print("üì• Loading model (this may take 2-3 minutes)...")

try:
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=torch.bfloat16,
        attn_implementation="flash_attention_2",
        token=hf_token,
    )
    logger.info(f"‚úÖ Model loaded with flash_attention_2")
    
except Exception as e:
    logger.warning(f"‚ö†Ô∏è Flash attention 2 not available: {str(e)[:80]}")
    logger.info("Falling back to default attention...")
    print("‚ö†Ô∏è Using default attention...")
    
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            quantization_config=bnb_config,
            device_map="auto",
            trust_remote_code=True,
            torch_dtype=torch.bfloat16,
            token=hf_token,
        )
        logger.info(f"‚úÖ Model loaded with default attention")
    except Exception as e2:
        logger.error(f"‚ùå Model loading failed: {e2}")
        raise

total_params = sum(p.numel() for p in model.parameters())
logger.info(f"‚úÖ Model loaded successfully")
logger.info(f"  ‚Ä¢ Model: {model_id}")
logger.info(f"  ‚Ä¢ Parameters: {total_params / 1e9:.2f}B")
logger.info(f"  ‚Ä¢ Dtype: {model.dtype}")
logger.info(f"  ‚Ä¢ Device: {next(model.parameters()).device}")
logger.info(f"  ‚Ä¢ Quantization: 4-bit NF4")
print(f"‚úÖ Model loaded: {model_id}")
print(f"Model parameters: {total_params / 1e9:.2f}B")
print(f"Model dtype: {model.dtype}")
print(f"Auto batch size: {AUTO_BATCH_SIZE} (can override later)")


## 3Ô∏è‚É£ Load and Prepare Dataset

In [None]:
import json
import random
from datasets import Dataset
from pathlib import Path
import logging

logger = logging.getLogger(__name__)

logger.info("üìÇ Scanning for WAF dataset...")
print("üìÇ Scanning for WAF dataset...\n")

# Default dataset path in Kaggle
DEFAULT_DATASET = "red_v26_phi3_evasion_expanded_100pct_en.jsonl"
input_dir = Path("/kaggle/input/waf-dataset")

if not input_dir.exists():
    logger.warning(f"Directory not found: {input_dir}")
    print(f"‚ö†Ô∏è Directory not found: {input_dir}")
    print("Searching in /kaggle/input/ instead...\n")
    input_dir = Path("/kaggle/input")
    logger.info(f"Searching in {input_dir}")

# First, look for the default dataset
dataset_path = None
for path in input_dir.rglob(DEFAULT_DATASET):
    dataset_path = path
    logger.info(f"‚úÖ Found default dataset: {path.name}")
    print(f"‚úÖ Found default dataset: {path.name}\n")
    break

# If not found, list all available JSONL files
if dataset_path is None:
    logger.warning(f"Default dataset '{DEFAULT_DATASET}' not found")
    jsonl_files = sorted(list(input_dir.rglob("*.jsonl")))
    
    if not jsonl_files:
        logger.error("‚ùå No .jsonl dataset found in /kaggle/input/waf-dataset/")
        raise FileNotFoundError("‚ùå No .jsonl dataset found in /kaggle/input/waf-dataset/")
    
    logger.info(f"Found {len(jsonl_files)} JSONL files")
    print(f"‚ö†Ô∏è Default dataset '{DEFAULT_DATASET}' not found")
    print(f"Available JSONL files ({len(jsonl_files)}):\n")
    
    file_info = []
    for idx, path in enumerate(jsonl_files, 1):
        # Count records
        record_count = 0
        with open(path, 'r', encoding='utf-8') as f:
            for _ in f:
                record_count += 1
        
        file_size_mb = path.stat().st_size / 1e6
        file_info.append({
            'idx': idx,
            'path': path,
            'records': record_count,
            'size_mb': file_size_mb
        })
        
        logger.info(f"  [{idx}] {path.name}: {record_count:,} records, {file_size_mb:.2f} MB")
        print(f"[{idx}] {path.name} ({record_count:,} records, {file_size_mb:.2f} MB)")
    
    # Auto-select largest file by record count
    selected_idx = max(range(len(file_info)), key=lambda i: file_info[i]['records'])
    dataset_path = file_info[selected_idx]['path']
    logger.info(f"‚úÖ Auto-selected: {dataset_path.name}")
    print(f"\n‚úÖ Selected: {dataset_path.name}")

total_records_available = 0
with open(dataset_path, 'r', encoding='utf-8') as f:
    for _ in f:
        total_records_available += 1

logger.info(f"üì• Loading dataset: {dataset_path.name} ({total_records_available:,} records)")
print(f"üì• Loading dataset: {dataset_path.name}")
print(f"Total records available: {total_records_available:,}\n")

# Load JSONL file
records = []
with open(dataset_path, 'r', encoding='utf-8') as f:
    for line in f:
        try:
            records.append(json.loads(line))
        except json.JSONDecodeError:
            continue

logger.info(f"‚úÖ Loaded {len(records):,} valid records")
print(f"‚úÖ Loaded {len(records):,} valid records")

# ‚öôÔ∏è SAMPLE SIZE CONFIGURATION
SAMPLE_SIZE = 5000  # ‚Üê EDIT THIS VALUE

logger.info(f"‚è±Ô∏è Training time estimates on Qwen-7B, 2 epochs, P100 GPU:")
print(f"\n‚è±Ô∏è Training time estimate (Qwen-7B, 2 epochs on P100):")
print(f"   1,000 samples  ‚Üí ~20 min")
print(f"   5,000 samples  ‚Üí ~1.5 hours (DEFAULT)")
print(f"   10,000 samples ‚Üí ~3 hours")
print(f"   20,000 samples ‚Üí ~6 hours")
print(f"   50,000 samples ‚Üí ~15 hours\n")

if len(records) > SAMPLE_SIZE:
    logger.info(f"üé≤ Sampling {SAMPLE_SIZE:,} from {len(records):,} records...")
    print(f"üé≤ Sampling {SAMPLE_SIZE:,} from {len(records):,} records...")
    records = random.sample(records, SAMPLE_SIZE)
    logger.info(f"‚úÖ Using {len(records):,} samples for training")
    print(f"‚úÖ Using {len(records):,} samples for training")
    print(f"‚è∞ Estimated training time: {1.5 * (SAMPLE_SIZE/5000):.1f} hours")
    logger.info(f"‚è∞ Estimated training time: {1.5 * (SAMPLE_SIZE/5000):.1f} hours")
else:
    logger.warning(f"Only {len(records):,} records available (less than {SAMPLE_SIZE:,})")
    print(f"‚ö†Ô∏è Only {len(records):,} records available (less than {SAMPLE_SIZE:,})")
    print(f"‚úÖ Using all {len(records):,} samples")
    print(f"‚è∞ Estimated training time: {1.5 * (len(records)/5000):.1f} hours")
    logger.info(f"‚è∞ Using all {len(records):,} samples | Est. time: {1.5 * (len(records)/5000):.1f} hours")

# Check record structure
logger.info(f"Record structure: {list(records[0].keys())}")
print(f"\nüìã Record structure:")
print(f"   Keys: {list(records[0].keys())}")
print(f"\nüìù Sample record (first 500 chars):")
sample_text = json.dumps(records[0], indent=2)[:500]
print(sample_text + "...")


In [None]:
import logging
logger = logging.getLogger(__name__)

def convert_to_qwen_format(record):
    """
    Convert to Qwen-7B exact chat format
    IMPORTANT: Qwen requires exact format with newlines
    
    Format:
    <|im_start|>system
    {system_prompt}
    <|im_end|>
    <|im_start|>user
    {user_message}
    <|im_end|>
    <|im_start|>assistant
    {assistant_response}
    <|im_end|>
    """
    if "messages" not in record:
        return ""
    
    messages = record["messages"]
    if not messages or len(messages) < 2:
        return ""
    
    # Build with exact Qwen format (system + user + assistant)
    text = "<|im_start|>system\nYou are a helpful cybersecurity assistant specialized in WAF evasion techniques and SQL injection/XSS payload generation.\n<|im_end|>\n"
    
    for msg in messages:
        role = msg.get("role", "").lower()
        content = msg.get("content", "").strip()
        
        if not content:
            continue
        
        # Ensure role is valid
        if role not in ["user", "assistant"]:
            continue
        
        # Build message block
        text += f"<|im_start|>{role}\n{content}\n<|im_end|>\n"
    
    # Must end with assistant token for training
    if not text.rstrip().endswith("<|im_end|>"):
        text += "<|im_start|>assistant\n<|im_end|>"
    
    return text.strip()

# Preprocess data
logger.info("üîÑ Converting dataset to Qwen-7B format...")
print("üîÑ Converting dataset to Qwen format...")
processed_data = []
skipped = 0

for i, record in enumerate(records):
    text = convert_to_qwen_format(record)
    if text:
        processed_data.append({"text": text})
    else:
        skipped += 1

logger.info(f"‚úÖ Converted {len(processed_data)} records, skipped {skipped}")
print(f"‚úÖ Converted {len(processed_data)} records")
if skipped > 0:
    logger.warning(f"‚ö†Ô∏è Skipped {skipped} invalid records")
    print(f"‚ö†Ô∏è Skipped {skipped} invalid records")

logger.info(f"üìã Sample formatted text (first 700 chars)")
print(f"\nüìã Sample formatted text (Qwen format):")
sample = processed_data[0]['text']
print("=" * 60)
print(sample[:700])
print("=" * 60)

# Create Hugging Face dataset
dataset = Dataset.from_dict({"text": [r["text"] for r in processed_data]})
avg_words = sum(len(r['text'].split()) for r in processed_data) // len(processed_data)
logger.info(f"‚úÖ Dataset created: {len(dataset)} samples | Avg {avg_words} words/sample")
print(f"\n‚úÖ Dataset created: {len(dataset)} samples")
print(f"Avg tokens per sample: ~{avg_words} words")


In [None]:
import logging
logger = logging.getLogger(__name__)

def preprocess_function(examples):
    """Tokenize and prepare data for training"""
    max_seq_length = 1024  # Increased for better Qwen-7B context utilization
    
    tokenized = tokenizer(
        examples["text"],
        truncation=True,
        max_length=max_seq_length,
        padding="max_length",
        return_tensors=None,
    )
    
    # Set labels = input_ids for causal language modeling
    tokenized["labels"] = tokenized["input_ids"].copy()
    
    return tokenized

logger.info("‚öôÔ∏è Tokenizing dataset...")
print("‚öôÔ∏è Tokenizing dataset...")
tokenized_dataset = dataset.map(
    preprocess_function,
    batched=True,
    batch_size=32,
    remove_columns=["text"],
    desc="Tokenizing",
)

logger.info(f"‚úÖ Tokenization complete | Samples: {len(tokenized_dataset)} | Max seq: 1024")
print(f"‚úÖ Tokenization complete")
print(f"Sample token ids length: {len(tokenized_dataset[0]['input_ids'])}")
print(f"Dataset ready: {len(tokenized_dataset)} samples")


## 4Ô∏è‚É£ Configure LoRA Adapter

In [None]:
from peft import LoraConfig, get_peft_model, TaskType
import logging

logger = logging.getLogger(__name__)

logger.info("‚öôÔ∏è Setting up adapter method...")
print("‚öôÔ∏è Setting up adapter method...\n")

# ‚ö° CHOOSE YOUR METHOD:
USE_UNSLOTH = False  # ‚Üê CHANGE TO True FOR FASTER TRAINING

if USE_UNSLOTH:
    try:
        logger.info("‚ö° Attempting to use UNSLOTH...")
        from unsloth import FastLanguageModel
        
        logger.info("‚ö° Using UNSLOTH for fast training (2-3x speedup)")
        print("‚ö° Using UNSLOTH for fast training...\n")
        print("Expected speedup: 2-3x faster")
        print(f"Expected time: ~{int(1.5 * (SAMPLE_SIZE/5000) / 2.5)} minutes for {SAMPLE_SIZE:,} samples\n")
        
        # Convert model for fast training
        model, tokenizer = FastLanguageModel.get_peft_model(
            model,
            r=16,
            lora_alpha=32,
            lora_dropout=0.05,
            target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
            bias="none",
            use_gradient_checkpointing=True,
            use_rslora=False,
        )
        logger.info("‚úÖ Unsloth adapter configured")
        trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
        logger.info(f"Trainable: {trainable / 1e6:.2f}M params")
        print("‚úÖ Unsloth adapter configured")
        print(f"Trainable parameters: {trainable / 1e6:.2f}M")
        
    except ImportError as e:
        logger.warning(f"‚ö†Ô∏è Unsloth not available: {e}")
        logger.warning("Falling back to PEFT...")
        print("‚ö†Ô∏è Unsloth not installed, falling back to PEFT...")
        USE_UNSLOTH = False

if not USE_UNSLOTH:
    logger.info("üìå Using standard PEFT LoRA (compatible, slightly slower)")
    print("üìå Using standard PEFT\n")
    
    # LoRA config - ALL projection layers for Qwen-7B
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=[
            "q_proj",      # Query projection (Attention)
            "v_proj",      # Value projection (Attention)
            "k_proj",      # Key projection (Attention)
            "o_proj",      # Output projection (Attention)
            "gate_proj",   # Gate projection (MLP)
            "up_proj",     # Up projection (MLP)
            "down_proj",   # Down projection (MLP)
        ],
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
    )
    
    # Verify target modules exist in Qwen-7B
    logger.info("Verifying LoRA target modules for Qwen-7B...")
    model_params = set()
    for name, _ in model.named_parameters():
        # Extract base name (e.g., "q_proj" from "lm_head.q_proj" or "model.layers.0.self_attn.q_proj")
        for target in lora_config.target_modules:
            if target in name:
                model_params.add(target)
    
    logger.info(f"‚úì Verified target modules in model: {sorted(model_params)}")
    
    # Wrap model with LoRA
    model = get_peft_model(model, lora_config)
    
    logger.info("‚úÖ LoRA adapter configured for Qwen-7B")
    logger.info(f"  ‚Ä¢ LoRA Rank: {lora_config.r}")
    logger.info(f"  ‚Ä¢ Alpha: {lora_config.lora_alpha}")
    logger.info(f"  ‚Ä¢ Dropout: {lora_config.lora_dropout}")
    logger.info(f"  ‚Ä¢ Target modules: {len(lora_config.target_modules)} (Attention: 4, MLP: 3)")
    logger.info(f"  ‚Ä¢ Task type: CAUSAL_LM")
    
    print(f"LoRA Configuration (Qwen-7B optimized):")
    print(f"  ‚Ä¢ Rank (r): {lora_config.r}")
    print(f"  ‚Ä¢ Alpha: {lora_config.lora_alpha}")
    print(f"  ‚Ä¢ Dropout: {lora_config.lora_dropout}")
    print(f"  ‚Ä¢ Target modules: {len(lora_config.target_modules)} layers")
    print(f"    - Attention: q_proj, v_proj, k_proj, o_proj")
    print(f"    - MLP: gate_proj, up_proj, down_proj\n")
    
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())
    trainable_pct = trainable / total * 100
    
    logger.info(f"‚úÖ Model wrapped with LoRA")
    logger.info(f"  ‚Ä¢ Trainable: {trainable / 1e6:.2f}M ({trainable_pct:.2f}%)")
    logger.info(f"  ‚Ä¢ Total: {total / 1e9:.2f}B")
    
    print(f"‚úÖ LoRA adapter configured")
    print(f"Trainable parameters: {trainable / 1e6:.2f}M ({trainable_pct:.2f}%)")
    print(f"Total parameters: {total / 1e9:.2f}B")


### ‚ö° Optional: Install Unsloth for 2-3x Faster Training

Run this **before cell 4** if you want to use Unsloth (2-3x speedup, 40% less VRAM):

```python
!pip install -q "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
```

Then in cell 4, change: `USE_UNSLOTH = True`

**Comparison**:
- PEFT: 1.5h training (5K samples) ‚Üí 1h saving + checkpoint
- Unsloth: 30min training (5K samples) ‚Üí Much faster! ‚ö°

Choose based on your time: if you want quick test, use Unsloth!

## 5Ô∏è‚É£ Fine-tune Model with Training Loop

In [None]:
from transformers import TrainingArguments, Trainer
from datetime import datetime
import logging

logger = logging.getLogger(__name__)

# Setup output directory
output_dir = "/kaggle/working/qwen7b_waf_adapter"
os.makedirs(output_dir, exist_ok=True)

# ‚öôÔ∏è TRAINING CONFIGURATION - CUSTOMIZE HERE
NUM_EPOCHS = 2  # ‚Üê INCREASE TO 3-4 FOR BETTER QUALITY (slower), REDUCE TO 1 FOR QUICK TEST
TRAIN_BATCH_SIZE = AUTO_BATCH_SIZE  # Use auto-detected, or set manually (4, 8, 16, 32)
GRADIENT_ACCUMULATION = 2
LEARNING_RATE = 1.5e-4

logger.info("="*60)
logger.info("üîß TRAINING CONFIGURATION")
logger.info("="*60)
logger.info(f"  ‚Ä¢ Epochs: {NUM_EPOCHS}")
logger.info(f"  ‚Ä¢ Per-device batch: {TRAIN_BATCH_SIZE}")
logger.info(f"  ‚Ä¢ Gradient accumulation: {GRADIENT_ACCUMULATION}")
logger.info(f"  ‚Ä¢ Effective batch: {TRAIN_BATCH_SIZE * GRADIENT_ACCUMULATION}")
logger.info(f"  ‚Ä¢ Learning rate: {LEARNING_RATE}")
logger.info(f"  ‚Ä¢ Output dir: {output_dir}")
logger.info(f"  ‚Ä¢ Samples: {len(tokenized_dataset)}")
logger.info(f"  ‚Ä¢ Model: Qwen-7B-Instruct (4-bit quantized)")
logger.info(f"  ‚Ä¢ Precision: BF16 (optimal for Qwen)")
logger.info("="*60)

print("üîß Training Configuration:")
print(f"  ‚Ä¢ Epochs: {NUM_EPOCHS}")
print(f"  ‚Ä¢ Batch size: {TRAIN_BATCH_SIZE}")
print(f"  ‚Ä¢ Gradient accumulation: {GRADIENT_ACCUMULATION}")
print(f"  ‚Ä¢ Effective batch: {TRAIN_BATCH_SIZE * GRADIENT_ACCUMULATION}")
print(f"  ‚Ä¢ Learning rate: {LEARNING_RATE}\n")

# Training arguments optimized for Kaggle GPU
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=TRAIN_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION,
    learning_rate=LEARNING_RATE,
    lr_scheduler_type="cosine",
    warmup_steps=50,
    weight_decay=0.01,
    max_grad_norm=1.0,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=2,
    seed=42,
    fp16=False,
    bf16=True,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    report_to=[],
    remove_unused_columns=True,
)

logger.info(f"üöÄ Initializing trainer...")
print("üöÄ Starting training...")
print(f"Output directory: {output_dir}")
print(f"Samples: {len(tokenized_dataset)}")
print(f"Steps per epoch: {len(tokenized_dataset) // (TRAIN_BATCH_SIZE * GRADIENT_ACCUMULATION)}")

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)

# Log trainer info
steps_per_epoch = len(tokenized_dataset) // (TRAIN_BATCH_SIZE * GRADIENT_ACCUMULATION)
total_steps = steps_per_epoch * NUM_EPOCHS
logger.info(f"‚úÖ Trainer initialized")
logger.info(f"  ‚Ä¢ Steps per epoch: {steps_per_epoch}")
logger.info(f"  ‚Ä¢ Total training steps: {total_steps}")

# Train
logger.info("="*60)
logger.info("‚è≥ TRAINING IN PROGRESS...")
logger.info("="*60)
print("\n" + "="*60)
print("‚è≥ Training in progress...")
print("="*60 + "\n")

train_result = trainer.train()

logger.info("="*60)
logger.info("‚úÖ TRAINING COMPLETED!")
logger.info("="*60)
logger.info(f"Training loss: {train_result.training_loss:.4f}")
logger.info(f"Training steps: {train_result.global_step}")
logger.info(f"Training time: {train_result.training_steps / 60:.1f} minutes")
logger.info(f"Average loss per epoch: {train_result.training_loss / NUM_EPOCHS:.4f}")
logger.info(f"Samples per second: {train_result.metrics.get('train_samples_per_second', 'N/A')}")

print("\n" + "="*60)
print("‚úÖ Training completed!")
print("="*60)
print(f"\nTraining loss: {train_result.training_loss:.4f}")
print(f"Training steps: {train_result.global_step}")
print(f"Training time: {train_result.training_steps / 60:.1f} minutes")
print(f"Average loss per epoch: {train_result.training_loss / NUM_EPOCHS:.4f}")


## 6Ô∏è‚É£ Save and Compress LoRA Adapter

In [None]:
import shutil
from datetime import datetime
import logging

logger = logging.getLogger(__name__)

logger.info("üíæ Saving LoRA adapter...")
print("üíæ Saving LoRA adapter...")

# Save the model
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

logger.info(f"‚úÖ Adapter saved to: {output_dir}")
print(f"‚úÖ Adapter saved to: {output_dir}")

# List files
logger.info("üìÅ Files saved:")
print("\nüìÅ Files saved:")
for file in os.listdir(output_dir):
    file_path = os.path.join(output_dir, file)
    if os.path.isfile(file_path):
        size_mb = os.path.getsize(file_path) / 1e6
        logger.info(f"  ‚Ä¢ {file} ({size_mb:.2f} MB)")
        print(f"  ‚Ä¢ {file} ({size_mb:.2f} MB)")

logger.info("\nüì¶ Compressing adapter...")
print("\nüì¶ Compressing adapter...")

# Compress to zip
zip_name = f"qwen7b_waf_adapter_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
zip_path = shutil.make_archive(
    f"/kaggle/working/{zip_name}",
    'zip',
    output_dir
)

zip_size_mb = os.path.getsize(zip_path) / 1e6
logger.info(f"‚úÖ Compressed: {zip_name}.zip ({zip_size_mb:.2f} MB)")
print(f"‚úÖ Compressed to: {zip_name}.zip ({zip_size_mb:.2f} MB)")

# Copy to output directory for download
output_zip = f"/kaggle/working/{zip_name}.zip"
logger.info(f"üì• Adapter ready: {output_zip}")
print(f"\nüì• Adapter ready for download at: {output_zip}")

# Summary
logger.info("="*60)
logger.info("üéâ TRAINING COMPLETE!")
logger.info("="*60)
logger.info(f"Adapter file: {zip_name}.zip | Size: {zip_size_mb:.2f} MB")
logger.info(f"Model: Qwen-7B-Instruct | Samples: {len(tokenized_dataset)} | Epochs: 2")

print("\n" + "="*60)
print("üéâ TRAINING COMPLETE!")
print("="*60)
print(f"\nAdapter file: {zip_name}.zip")
print(f"Size: {zip_size_mb:.2f} MB")
print(f"Model: Qwen-7B-Instruct")
print(f"Samples trained: {len(tokenized_dataset)}")
print(f"Epochs: 2")
print(f"\nDownload the zip file and extract it with your Qwen model!")
print("="*60)

logger.info("\nüìä Logs saved to: {log_file}")


## üß™ (Optional) Quick Test Inference

Test the adapter with a sample prompt before downloading.

In [None]:
import logging
logger = logging.getLogger(__name__)

logger.info("üß™ Starting inference test with trained adapter...")
print("üß™ Testing inference with trained adapter...\n")

# Test prompt - EXACT Qwen format
test_prompt = """<|im_start|>system
You are a helpful cybersecurity assistant specialized in WAF evasion techniques and SQL injection/XSS payload generation.
<|im_end|>
<|im_start|>user
Generate a SQL injection payload that bypasses WAF detection by using comments and encoding.
Target: SELECT * FROM users WHERE id=1
WAF blocking: UNION, SELECT, --, /*
<|im_end|>
<|im_start|>assistant
"""

logger.info(f"Test prompt length: {len(test_prompt)} chars")
print(f"üìù Test prompt (Qwen format):")
print("=" * 60)
print(test_prompt)
print("=" * 60)
print("\n‚è≥ Generating response...\n")

try:
    # Tokenize and generate using the trained model (not inference_model)
    logger.info("Tokenizing test prompt...")
    inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda")
    logger.info(f"Input tokens: {inputs['input_ids'].shape}")
    
    logger.info("Generating response with trained model...")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    logger.info(f"Generated tokens: {outputs.shape}")
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    logger.info("Response generated successfully")
    
    print(f"‚úÖ Generated response:")
    print("=" * 60)
    print(response)
    print("=" * 60 + "\n")
    
    logger.info("‚úÖ Adapter trained and tested successfully!")
    print("üéâ Adapter trained and tested successfully!")
    print(f"Format: ‚úì Qwen-7B exact format")
    logger.info(f"Model dtype: {model.dtype}")
    logger.info(f"Response length: {len(response)} chars")

except Exception as e:
    logger.error(f"‚ùå Inference failed: {e}", exc_info=True)
    print(f"‚ùå Inference failed: {e}")
    print("This is expected if model hasn't been trained yet.")
    raise
