In [None]:
# Importing Required Packages
import torch
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, pipeline, logging
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import json

In [None]:
# Model name on Hugging Face
model_name = "meta-llama/Llama-2-7b-chat-hf"

# Your exact BitsAndBytes configuration for 4-bit QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_use_double_quant = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.bfloat16 # Recommended for newer GPUs (A100, H100) or high-end consumer cards
)

In [None]:
# Training parameters
new_model = "llama-2-7b-summarization-qlora"
learning_rate = 2e-4
batch_size = 4
num_train_epochs = 3
max_seq_length = 1024 # Max sequence length for data (Llama 2 supports 4096, but 1024 is safer for initial fine-tuning)
output_dir = "./results"

In [None]:
json_file = r"C:\Users\Webbies\Jupyter_Notebooks\Assessli_LBM\Modified_Data.json"
with open(json_file, "r") as f:
    data = json.load(f)

print("[DEBUG] JSON Data Loaded")

In [None]:
# Function to Convert our raw data into Llama Prompt
def format_data_to_llama_prompt(example):
    """Formats each data point into the Llama 2 Chat Instruction format."""
    
    # Extract data fields
    profile = example['user_profile']
    document = example['document_text']
    summary = example['user_generated_summary']

    # Create the Instruction part
    instruction = f"""You are an expert summarization model. Summarize the following document based on the user's preferences.
    User Preferences:
        - Tone: {profile['tone_preference']}
        - Length: {profile['summary_length_preference']}
        - Focus: {profile['focus_area']}

    Document:
        {document}"""

    # Format into the Llama 2 Chat template: <s>[INST] Instruction [/INST] Response </s>
    # The template is crucial for Llama 2 instruction tuning.
    formatted_prompt = f"<s>[INST] {instruction.strip()} [/INST] {summary.strip()} </s>"
    
    return {"text": formatted_prompt}

In [None]:
# Convert list of dicts to Hugging Face Dataset
raw_dataset = Dataset.from_list(data)

In [None]:
# Apply the formatting function
dataset = raw_dataset.map(format_data_to_llama_prompt, remove_columns=raw_dataset.column_names)

In [None]:
# Load the Llama 2 model with the 4-bit quantization config
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto" # Auto-distribute across available GPUs
)
model.config.use_cache = False
model.config.pretraining_tp = 1 # Set to 1 for 7b models

In [None]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # Llama 2 uses EOS as pad token

In [None]:
# LORA configuration
peft_config = LoraConfig(
    lora_alpha=16,          # Scaling factor for LORA weights
    lora_dropout=0.1,       # Dropout probability
    r=64,                   # LORA attention dimension (rank)
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] # Target all Linear layers for better performance
)

In [None]:
# Training arguments
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit", # Optimized for QLoRA
    save_steps=25,
    logging_steps=25,
    learning_rate=learning_rate,
    weight_decay=0.001,
    fp16=False,
    bf16=True, # Use bfloat16 for faster training (matches bnb_4bit_compute_dtype)
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="none" # Remove if you use a logger like wandb
)

In [None]:
# Initialize the Supervised Fine-Tuning Trainer (SFTTrainer)
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text", # The column containing the formatted Llama prompt
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

In [None]:
# Start fine-tuning
print("Starting Llama 2 QLoRA fine-tuning...")
trainer.train()

In [None]:
# Save the fine-tuned model (LoRA adapters)
trainer.model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

print(f"Fine-tuning complete. LoRA adapters saved to: {new_model}")