# Method 3: Fine-tuning LLM for NER Extraction

This notebook demonstrates NER extraction using a fine-tuned LLM with QLoRA.

## Overview
- **Approach**: Supervised fine-tuning with labeled NER data
- **Technique**: QLoRA (Quantized Low-Rank Adaptation)
- **Model**: Meta-Llama-3.1-8B-Instruct
- **Advantages**: 
  - Highest accuracy for domain-specific patterns
  - Learns Vietnamese/English entity patterns
  - Consistent and stable predictions
- **Disadvantages**:
  - Requires training time and GPU resources
  - Needs labeled training data
  - More complex setup

## 1. Setup and Imports

In [None]:
import sys
sys.path.append('..')

from src.config import NERConfig, PROCESSED_DATA_DIR, RESULTS_DIR, CHECKPOINTS_DIR
from src.data_loader import NERDataLoader
from src.finetuning import FineTunedNERExtractor
from src.evaluation import NEREvaluator
from src.benchmark import NERBenchmark

import json
from pathlib import Path
import torch

# Check GPU availability
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 2. Load Configuration

In [None]:
# Initialize configuration
config = NERConfig(
    model_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
    learning_rate=1e-4,
    num_epochs=3,
    batch_size=4,
    gradient_accumulation_steps=4,
    lora_r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    temperature=0.1,
    max_length=2048
)

print("Fine-tuning Configuration:")
print(f"  Model: {config.model_name}")
print(f"  Learning Rate: {config.learning_rate}")
print(f"  Epochs: {config.num_epochs}")
print(f"  Batch Size: {config.batch_size}")
print(f"  Gradient Accumulation: {config.gradient_accumulation_steps}")
print(f"  LoRA r: {config.lora_r}")
print(f"  LoRA alpha: {config.lora_alpha}")

## 3. Load Dataset

In [None]:
# Load datasets
train_dataset = NERDataLoader.load_json_dataset(PROCESSED_DATA_DIR / "train.json")
val_dataset = NERDataLoader.load_json_dataset(PROCESSED_DATA_DIR / "validation.json")
test_dataset = NERDataLoader.load_json_dataset(PROCESSED_DATA_DIR / "test.json")

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")

# Show example
print("\nExample training sample:")
print(f"Text: {train_dataset[0]['text'][:200]}...")
print(f"Entities: {train_dataset[0]['entities']}")

## 4. Initialize Fine-tuning Pipeline

In [None]:
# Initialize extractor for training
extractor = FineTunedNERExtractor(config=config)

print("Fine-tuning pipeline initialized!")

## 5. Preview Training Data Format

In [None]:
# Preview how data is formatted for training
sample_formatted = extractor.prepare_training_data([train_dataset[0]])

print("Formatted training example:")
print(sample_formatted[0]['text'])

## 6. Fine-tune the Model

In [None]:
# Fine-tune the model
# This will take some time depending on dataset size and GPU

output_dir = CHECKPOINTS_DIR / "finetuned_ner"

print("Starting fine-tuning...\n")
extractor.train(
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    output_dir=output_dir
)

print(f"\nModel saved to {output_dir}")

## 7. Load Fine-tuned Model for Inference

In [None]:
# If you already have a trained model, load it:
# extractor = FineTunedNERExtractor(config=config)
# extractor.load_model(CHECKPOINTS_DIR / "finetuned_ner")

print("Model ready for inference!")

## 8. Test on Sample Examples

In [None]:
# Test on a few examples
num_examples = 3

for i, sample in enumerate(val_dataset[:num_examples]):
    print(f"\n{'='*80}")
    print(f"Example {i+1}")
    print(f"{'='*80}")
    
    text = sample['text']
    ground_truth = sample['entities']
    
    print(f"\nText: {text[:300]}...\n")
    
    # Extract entities
    predicted = extractor.extract_entities(text)
    
    print("Ground Truth:")
    print(json.dumps(ground_truth, indent=2, ensure_ascii=False))
    
    print("\nPredicted:")
    print(json.dumps(predicted, indent=2, ensure_ascii=False))

## 9. Evaluate on Validation Set

In [None]:
# Run evaluation on validation set
print("Running evaluation on validation set...")
predictions, ground_truth = extractor.evaluate_on_dataset(val_dataset)

# Evaluate
evaluator = NEREvaluator(entity_types=config.entity_types)
results = evaluator.evaluate_all(predictions, ground_truth)

# Print results
evaluator.print_results(results)

# Save results
results_path = RESULTS_DIR / "finetuning_validation.json"
evaluator.save_results(results, results_path)
print(f"Results saved to {results_path}")

## 10. Run Benchmark on Test Set

In [None]:
# Run benchmark on test set
benchmark = NERBenchmark(config=config)
test_results = benchmark.run_benchmark(
    method_name="Fine-tuning",
    extractor=extractor,
    test_dataset=test_dataset,
    verbose=True
)

# Save benchmark results
benchmark.save_results(RESULTS_DIR / "finetuning")

## 11. Analysis and Insights

In [None]:
print("\nKey Insights:")
print(f"  - Exact Match Accuracy: {test_results['exact_match_accuracy']:.2%}")
print(f"  - Macro F1 Score: {test_results['partial_match_metrics']['macro_avg']['f1']:.2%}")
print(f"  - Inference Speed: {test_results['samples_per_second']:.2f} samples/second")

print("\nStrengths:")
print("  - Highest accuracy for domain-specific extraction")
print("  - Learns patterns from training data")
print("  - Consistent and reliable predictions")
print("  - Better with Vietnamese/domain-specific names")

print("\nWeaknesses:")
print("  - Requires training time and GPU resources")
print("  - Needs labeled training data")
print("  - More complex to set up and maintain")

## 12. Save Predictions for Analysis

In [None]:
# Save predictions
predictions_path = RESULTS_DIR / "finetuning" / "predictions.json"
benchmark.save_predictions(
    method_name="Fine-tuning",
    predictions=predictions,
    output_path=predictions_path
)

print("\nExperiment complete!")
print(f"Results saved to {RESULTS_DIR / 'finetuning'}")