In [None]:
# -*- coding: utf-8 -*-
"""
2_finetune_adapter.ipynb

This notebook demonstrates how to fine-tune the LLaMA 3 model using the Adapter method (specifically, IA3) for text summarization.
It reuses the 4-bit quantized base model and applies IA3 adapters on top.
"""

# Import necessary libraries
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer
from peft import IA3Config, get_peft_model, prepare_model_for_kbit_training
from datasets import Dataset
import torch
import pandas as pd
import os

# Define model name and paths
MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
BASE_MODEL_DIR = "models/llama3_base/"
ADAPTER_MODEL_DIR = "models/llama3_finetuned_adapter/"
DATA_DIR = "data/"

# Ensure directories exist
os.makedirs(ADAPTER_MODEL_DIR, exist_ok=True)

# Load tokenizer (saved from previous step)
print(f"Loading tokenizer from {BASE_MODEL_DIR}...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_DIR)
print("Tokenizer loaded successfully.")

# Configure BitsAndBytes for 4-bit quantization (already done for base model, but redefine for clarity)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load base model with 4-bit quantization (this will load from Hugging Face if not cached)
print(f"Loading base model {MODEL_NAME} with 4-bit quantization...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
)
model.config.use_cache = False
model.config.pretraining_tp = 1
print("Base model loaded and quantized successfully.")

# Prepare model for k-bit training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

# IA3 (Adapter) configuration
ia3_config = IA3Config(
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    feedforward_modules=["gate_proj", "up_proj", "down_proj"], # Modules to apply IA3 to in feedforward
    task_type="CAUSAL_LM",
)

# Apply IA3 to the model
model = get_peft_model(model, ia3_config)
print("IA3 (Adapter) model prepared successfully.")
model.print_trainable_parameters()

# Load dataset
print("Loading datasets...")
train_df = pd.read_csv(os.path.join(DATA_DIR, "train.csv"))
val_df = pd.read_csv(os.path.join(DATA_DIR, "validation.csv"))

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

print("Datasets loaded successfully.")

# Preprocess data
MAX_LENGTH = 512

def preprocess_function(examples):
    inputs = []
    for article, summary in zip(examples['article'], examples['summary']):
        instruction = f"Summarize the following article: {article}"
        chat = [
            {"role": "user", "content": instruction},
            {"role": "assistant", "content": summary},
        ]
        inputs.append(tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False))

    model_inputs = tokenizer(inputs, max_length=MAX_LENGTH, truncation=True, padding="max_length")
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

print("Tokenizing and preprocessing datasets...")
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True)
tokenized_val_dataset = val_dataset.map(preprocess_function, batched=True)
print("Datasets tokenized and preprocessed successfully.")

tokenized_train_dataset = tokenized_train_dataset.remove_columns(["article", "summary"])
tokenized_val_dataset = tokenized_val_dataset.remove_columns(["article", "summary"])

# Set up training arguments and trainer
training_args = TrainingArguments(
    output_dir=ADAPTER_MODEL_DIR, # Output directory for Adapter model
    num_train_epochs=1, # Number of training epochs (for demo)
    per_device_train_batch_size=2, # Batch size per GPU for training
    per_device_eval_batch_size=2, # Batch size per GPU for evaluation
    gradient_accumulation_steps=4, # Number of updates steps to accumulate before performing a backward/update pass
    evaluation_strategy="epoch", # Evaluation strategy to adopt during training
    save_strategy="epoch", # Save strategy to adopt during training
    logging_dir="./logs_adapter", # Directory for storing logs
    logging_strategy="steps",
    logging_steps=10, # Log every N updates steps
    learning_rate=2e-4, # Learning rate
    fp16=True, # Use half-precision (float16) for faster training
    seed=42, # Random seed for reproducibility
    report_to="none", # Disable reporting to external services like Weights & Biases
    remove_unused_columns=False, # Important for PEFT
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    tokenizer=tokenizer,
)

# Train the model
print("Starting Adapter fine-tuning (IA3)...")
trainer.train()
print("Adapter fine-tuning (IA3) complete.")

# Save the fine-tuned IA3 adapter
print(f"Saving IA3 adapter to {ADAPTER_MODEL_DIR}...")
trainer.model.save_pretrained(ADAPTER_MODEL_DIR)
print("IA3 adapter saved successfully.")
