# Fine-Tuning Falcon-7B with LoRA for E-Commerce FAQ

This notebook implements parameter-efficient fine-tuning using LoRA adapters.

**Requirements**: Google Colab Pro with A100/V100 GPU

In [None]:
!pip install -q torch transformers datasets peft bitsandbytes accelerate trl einops scipy

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
from trl import SFTTrainer
import os

In [None]:
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 1. Configuration

In [None]:
MODEL_NAME = "tiiuae/falcon-7b"
OUTPUT_DIR = "./falcon-7b-ecommerce-lora"
MAX_LENGTH = 512
BATCH_SIZE = 4
GRADIENT_ACCUMULATION = 4
LEARNING_RATE = 2e-4
NUM_EPOCHS = 3
LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.05

## 2. Load and Prepare Dataset

In [None]:
dataset = load_dataset("bitext/Bitext-customer-support-llm-chatbot-training-dataset")
print(f"Dataset size: {len(dataset['train'])}")

In [None]:
def format_instruction(example):
    text = f"""### Instruction:
You are a helpful e-commerce customer support assistant. Answer the customer's question professionally and helpfully.

### Customer Query:
{example['instruction']}

### Response:
{example['response']}"""
    return {"text": text}

dataset = dataset.map(format_instruction)
print("Sample formatted text:")
print(dataset['train'][0]['text'][:500])

In [None]:
dataset = dataset['train'].train_test_split(test_size=0.1, seed=42)
train_dataset = dataset['train']
val_dataset = dataset['test']
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

## 3. Load Model with 4-bit Quantization

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print(f"Model loaded: {MODEL_NAME}")

## 4. Configure LoRA

In [None]:
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=["query_key_value"],
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

## 5. Training Configuration

In [None]:
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION,
    learning_rate=LEARNING_RATE,
    weight_decay=0.01,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    logging_steps=25,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    fp16=True,
    optim="paged_adamw_8bit",
    report_to="none",
    save_total_limit=2
)

## 6. Initialize Trainer

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    dataset_text_field="text",
    max_seq_length=MAX_LENGTH,
    tokenizer=tokenizer,
    args=training_args
)

## 7. Train Model

In [None]:
print("Starting training...")
trainer.train()

In [None]:
import matplotlib.pyplot as plt

logs = trainer.state.log_history
train_loss = [x['loss'] for x in logs if 'loss' in x]
eval_loss = [x['eval_loss'] for x in logs if 'eval_loss' in x]

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(train_loss)
plt.title('Training Loss')
plt.xlabel('Steps')
plt.ylabel('Loss')

plt.subplot(1, 2, 2)
plt.plot(eval_loss)
plt.title('Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.tight_layout()
plt.savefig('training_curves.png')
plt.show()

## 8. Save Model

In [None]:
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!cp -r {OUTPUT_DIR} /content/drive/MyDrive/
print("Model copied to Google Drive")

## 9. Test Inference

In [None]:
def generate_response(query):
    prompt = f"""### Instruction:
You are a helpful e-commerce customer support assistant. Answer the customer's question professionally and helpfully.

### Customer Query:
{query}

### Response:"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("### Response:")[-1].strip()
    return response

In [None]:
test_queries = [
    "Where is my order? I placed it 5 days ago.",
    "How can I return a product?",
    "I want to cancel my subscription",
    "The product I received is damaged"
]

for query in test_queries:
    print(f"Query: {query}")
    print(f"Response: {generate_response(query)}")
    print("-" * 50)

## Summary

Training complete. The LoRA adapters have been saved and can be loaded for inference or further evaluation.