# Fine-tuning CodeGen for FastAPI Code Generation using LoRA

This notebook performs parameter-efficient fine-tuning of the CodeGen-350M model using LoRA (Low-Rank Adaptation).

## Google Colab Setup
First, we need to mount Google Drive, check GPU availability and install required packages.

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Check GPU availability
!nvidia-smi

# Install required packages
!pip install torch transformers datasets numpy peft accelerate

# Setup paths
from pathlib import Path

# Set project paths
BASE_PATH = Path('/content/drive/MyDrive/fastapi-codegen')
PROCESSED_DATA_PATH = BASE_PATH / 'data/processed'
MODEL_PATH = BASE_PATH / 'models'

# Create directories
MODEL_PATH.mkdir(parents=True, exist_ok=True)
(MODEL_PATH / 'checkpoints').mkdir(exist_ok=True)

print(f'Project directory: {BASE_PATH}')
print(f'Processed data directory: {PROCESSED_DATA_PATH}')
print(f'Model directory: {MODEL_PATH}')

In [None]:
# from pathlib import Path
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    default_data_collator
)
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType
)
from datasets import load_from_disk

# Clear GPU memory if needed
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [None]:
# Paths
# PROCESSED_DATA_PATH = Path('../data/processed')
# MODEL_PATH = Path('../models')
# MODEL_PATH.mkdir(exist_ok=True)

# Load datasets
train_dataset = load_from_disk(PROCESSED_DATA_PATH / 'train')
test_dataset = load_from_disk(PROCESSED_DATA_PATH / 'test')
dev_dataset = load_from_disk(PROCESSED_DATA_PATH / 'dev')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(PROCESSED_DATA_PATH / 'tokenizer')

print(f'Train size: {len(train_dataset)}')
print(f'Test size: {len(test_dataset)}')
print(f'Dev size: {len(dev_dataset)}')

In [None]:
# Initialize model
MODEL_NAME = 'Salesforce/codegen-350M-mono'

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map='auto',
    pad_token_id=tokenizer.eos_token_id
)

# Configure LoRA
lora_config = LoraConfig(
    r=8,  # rank of update matrices
    lora_alpha=32,  # scaling factor
    target_modules=[
        "wte",  # token embeddings
        "c_attn",  # attention weights
        "c_proj",  # projection layers
        "c_fc"  # feed-forward layers
    ],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

# Add LoRA adapters to model
model = get_peft_model(model, lora_config)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Print trainable parameters info
model.print_trainable_parameters()

In [None]:
# Training arguments optimized for LoRA
training_args = TrainingArguments(
    output_dir=str(MODEL_PATH / 'checkpoints'),
    evaluation_strategy='steps',
    eval_steps=50,
    save_steps=50,
    learning_rate=5e-4,  # Higher learning rate for LoRA
    num_train_epochs=3,
    weight_decay=0.01,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    fp16=True,  # Enable mixed precision training
    save_total_limit=2,  # Keep only the last 2 checkpoints
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss'
)

In [None]:
# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    data_collator=default_data_collator,
)

# Start training
print('Starting training...')
trainer.train()

print('\nTraining completed!')

In [None]:
# Evaluate on test set
test_results = trainer.evaluate(test_dataset)
print('Test results:', test_results)

In [None]:
# Save final model and LoRA weights
model.save_pretrained(str(MODEL_PATH / 'final'))
print('Saved final model with LoRA weights')