# P-Tuning v2 Example

This notebook demonstrates P-Tuning v2, which learns continuous prompts at every transformer layer.

**Key Features:**
- Learn virtual tokens instead of discrete prompts
- Freeze all model weights
- Insert trainable prompts at multiple layers
- Extremely parameter efficient (~0.1% trainable)

In [None]:
# Install required libraries
!pip install -q transformers datasets accelerate peft evaluate

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    default_data_collator
)
from peft import (
    get_peft_model,
    PromptTuningConfig,
    TaskType,
    PromptTuningInit,
    PrefixTuningConfig
)
from datasets import load_dataset
import numpy as np
from evaluate import load

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 1. Load Dataset

In [None]:
# Load AG News dataset for text classification
dataset = load_dataset("ag_news")

# Use smaller subset
train_dataset = dataset["train"].shuffle(seed=42).select(range(2000))
eval_dataset = dataset["test"].shuffle(seed=42).select(range(500))

# Label mappings
label_list = ["World", "Sports", "Business", "Sci/Tech"]
num_labels = len(label_list)

print(f"Training samples: {len(train_dataset)}")
print(f"Evaluation samples: {len(eval_dataset)}")
print(f"Number of labels: {num_labels}")
print(f"\nExample:")
print(f"Text: {train_dataset[0]['text'][:100]}...")
print(f"Label: {label_list[train_dataset[0]['label']]}")

## 2. Load Pre-trained Model

In [None]:
model_name = "bert-base-uncased"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label={i: label for i, label in enumerate(label_list)},
    label2id={label: i for i, label in enumerate(label_list)}
)

print(f"Model loaded: {model_name}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

## 3. Configure P-Tuning v2 (Prefix Tuning)

In [None]:
# P-Tuning v2 is implemented as Prefix Tuning in PEFT
ptuning_config = PrefixTuningConfig(
    task_type=TaskType.SEQ_CLS,
    num_virtual_tokens=20,  # Number of virtual tokens to prepend
    encoder_hidden_size=128,  # Hidden size of the encoder
    prefix_projection=True,  # Use MLP reparameterization
    inference_mode=False
)

# Apply P-Tuning to model
model = get_peft_model(model, ptuning_config)

# Print trainable parameters
model.print_trainable_parameters()

# Move to device
model = model.to(device)

## 4. Tokenize Dataset

In [None]:
def tokenize_function(examples):
    outputs = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )
    return outputs

# Tokenize datasets
tokenized_train = train_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text"]
)

tokenized_eval = eval_dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text"]
)

print("Datasets tokenized successfully")

## 5. Setup Training

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./ptuning_results",
    learning_rate=1e-3,  # Higher LR for prompt tuning
    num_train_epochs=5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    warmup_steps=100,
    logging_steps=50,
    evaluation_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=200,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",
    fp16=True
)

# Metrics
accuracy = load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

print(f"Training steps: {len(tokenized_train) // training_args.per_device_train_batch_size * training_args.num_train_epochs}")

## 6. Train Model

In [None]:
# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    tokenizer=tokenizer,
    data_collator=default_data_collator,
    compute_metrics=compute_metrics
)

# Train
print("Starting P-Tuning v2 training...")
trainer.train()

# Save the prompt embeddings
model.save_pretrained("./ptuning_adapter")
print("\nP-Tuning adapter saved!")

## 7. Evaluate Model

In [None]:
# Evaluate
eval_results = trainer.evaluate()

print(f"Evaluation Results:")
print(f"Loss: {eval_results['eval_loss']:.4f}")
print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")

## 8. Test Predictions

In [None]:
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=-1).item()
    
    label = model.config.id2label[predicted_class]
    confidence = predictions[0][predicted_class].item()
    
    return label, confidence

# Test examples
test_texts = [
    "The stock market reached new highs today as investors showed confidence.",
    "Scientists discovered a new exoplanet that might support life.",
    "The championship game ended in overtime with a thrilling victory.",
    "New smartphone features revolutionary AI capabilities.",
    "Peace talks continue between the two nations."
]

for text in test_texts:
    label, confidence = predict(text)
    print(f"Text: {text[:60]}...")
    print(f"Prediction: {label} (Confidence: {confidence:.2%})")
    print()

## 9. Compare with Prompt Tuning (Basic)

In [None]:
# Basic Prompt Tuning (input-only prompts)
prompt_tuning_config = PromptTuningConfig(
    task_type=TaskType.SEQ_CLS,
    prompt_tuning_init=PromptTuningInit.RANDOM,
    num_virtual_tokens=8,  # Fewer tokens at input only
    tokenizer_name_or_path=model_name
)

print("Comparison:")
print("\nPrompt Tuning (Basic):")
print("- Adds soft prompts only at input")
print("- Fewer parameters")
print("- Good for simple tasks")

print("\nP-Tuning v2 (Prefix Tuning):")
print("- Adds prompts at every layer")
print("- More capacity")
print("- Better for complex tasks")
print("- Matches full fine-tuning on many benchmarks")

## 10. Multi-Task Learning with P-Tuning

In [None]:
# P-Tuning enables efficient multi-task learning
print("Multi-Task Learning with P-Tuning:")
print("="*50)
print("\n1. Train separate prompt embeddings for each task")
print("2. Keep base model frozen and shared")
print("3. Switch tasks by swapping prompt embeddings")
print("\nExample:")
print("- Task 1: News Classification (20 virtual tokens)")
print("- Task 2: Sentiment Analysis (20 virtual tokens)")
print("- Task 3: Topic Classification (20 virtual tokens)")
print("\nMemory:")
print("- Base Model: 440MB (shared)")
print("- Each Task: ~1MB (prompts only)")
print("- Total for 3 tasks: 440MB + 3MB = 443MB")
print("- vs Full Fine-tuning: 440MB × 3 = 1,320MB")

# Code structure for multi-task
print("\nCode structure:")
print("""```python
# Load base model once
base_model = AutoModel.from_pretrained(model_name)

# Load task-specific prompts
news_prompts = load_prompts("news_task")
sentiment_prompts = load_prompts("sentiment_task")

# Switch tasks dynamically
model.set_prompts(news_prompts)  # For news classification
model.set_prompts(sentiment_prompts)  # For sentiment analysis
```""")

## Key Takeaways

### P-Tuning v2 Benefits
1. **Ultra-efficient**: Only ~0.1% parameters trainable
2. **No model updates**: Base model completely frozen
3. **Multi-task ready**: One model, many task prompts
4. **Fast switching**: Change tasks by swapping prompts
5. **Matches performance**: Often equals full fine-tuning

### Best Practices
- Use 10-20 virtual tokens for most tasks
- Higher learning rates (1e-3) than full fine-tuning
- Prefix projection (MLP) improves performance
- Works best with models >1B parameters

### When to Use P-Tuning
- Multiple tasks on same model
- Extremely limited memory
- Need to preserve base model exactly
- Rapid task switching required