In [None]:
# Amharic NER Model Fine-tuning

This notebook demonstrates how to fine-tune a transformer model for Amharic Named Entity Recognition (NER).

## Overview

In this notebook, we will:

1. Load the labeled data from the previous step
2. Prepare the data for training, validation, and testing
3. Fine-tune a pre-trained transformer model for Amharic NER
4. Evaluate the model's performance
5. Save the fine-tuned model for further use


In [None]:
# Import required libraries
import os
import sys
import json
from pathlib import Path
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import torch
from sklearn.model_selection import train_test_split

# Add the project root directory to the Python path
project_root = Path().resolve().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# Import the AmharicNERTrainer class from our custom module
from src.models.model_trainer import AmharicNERTrainer

# Check if GPU is available
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")


In [None]:
# Define input and output directories
labeled_data_dir = project_root / "data" / "labeled"
models_dir = project_root / "data" / "models"

# Create output directory if it doesn't exist
os.makedirs(models_dir, exist_ok=True)

print(f"Labeled data directory: {labeled_data_dir}")
print(f"Models directory: {models_dir}")


In [None]:
# Load the labeled data
def load_labeled_data():
    """
    Load the labeled data from the CoNLL file.
    
    Returns:
        List of examples with tokens and labels
    """
    conll_path = labeled_data_dir / "labeled_data.conll"
    
    if not conll_path.exists():
        print(f"Labeled data not found at {conll_path}")
        return None
    
    # Initialize the trainer
    trainer = AmharicNERTrainer(
        model_name="xlm-roberta-base",
        output_dir=str(models_dir)
    )
    
    # Load the labeled data
    examples = trainer.load_conll_data(conll_path)
    
    print(f"Loaded {len(examples)} examples from {conll_path}")
    
    # Display a sample example
    if examples:
        print("\nSample example:")
        sample = examples[0]
        print(f"Tokens: {sample['tokens'][:10]}...")
        print(f"Labels: {sample['labels'][:10]}...")
    
    return examples, trainer

# Load the labeled data
# Note: This will only work after manual labeling is complete
# examples, trainer = load_labeled_data()


In [None]:
# Prepare the data for training
def prepare_data(examples, trainer, test_size=0.2, val_size=0.1):
    """
    Prepare the data for training, validation, and testing.
    
    Args:
        examples: List of examples with tokens and labels
        trainer: AmharicNERTrainer instance
        test_size: Proportion of data to use for testing
        val_size: Proportion of data to use for validation
        
    Returns:
        Dictionary with train, validation, and test datasets
    """
    if not examples:
        print("No examples to prepare")
        return None
    
    # Prepare the datasets
    datasets = trainer.prepare_dataset(examples, test_size=test_size, val_size=val_size)
    
    print(f"Prepared datasets:")
    print(f"- Training: {len(datasets['train'])} examples")
    print(f"- Validation: {len(datasets['validation'])} examples")
    print(f"- Testing: {len(datasets['test'])} examples")
    
    return datasets

# Prepare the data for training
# datasets = prepare_data(examples, trainer)


In [None]:
# Fine-tune the model
def train_model(datasets, trainer, learning_rate=2e-5, batch_size=16, num_epochs=3):
    """
    Fine-tune the model on the prepared datasets.
    
    Args:
        datasets: Dictionary with train, validation, and test datasets
        trainer: AmharicNERTrainer instance
        learning_rate: Learning rate for training
        batch_size: Batch size for training
        num_epochs: Number of epochs to train for
        
    Returns:
        Dictionary with evaluation metrics
    """
    if not datasets:
        print("No datasets to train on")
        return None
    
    print("Starting model fine-tuning...")
    print(f"- Learning rate: {learning_rate}")
    print(f"- Batch size: {batch_size}")
    print(f"- Number of epochs: {num_epochs}")
    
    # Train the model
    test_results = trainer.train(
        datasets,
        learning_rate=learning_rate,
        batch_size=batch_size,
        num_epochs=num_epochs
    )
    
    print("\nTraining complete!")
    print("\nTest results:")
    for metric, value in test_results.items():
        print(f"- {metric}: {value:.4f}")
    
    return test_results

# Fine-tune the model
# Note: This will take some time to run, especially without a GPU
# test_results = train_model(datasets, trainer)


In [None]:
# Compare different models
def compare_models():
    """
    Compare different transformer models for Amharic NER.
    
    Returns:
        Dictionary with model names and their test results
    """
    # Define the models to compare
    models = [
        "xlm-roberta-base",
        "bert-base-multilingual-cased",
        "distilbert-base-multilingual-cased",
        # Add more models here
    ]
    
    # Load the labeled data
    conll_path = labeled_data_dir / "labeled_data.conll"
    
    if not conll_path.exists():
        print(f"Labeled data not found at {conll_path}")
        return None
    
    results = {}
    
    for model_name in models:
        print(f"\n{'=' * 50}")
        print(f"Training model: {model_name}")
        print(f"{'=' * 50}")
        
        # Initialize the trainer
        trainer = AmharicNERTrainer(
            model_name=model_name,
            output_dir=str(models_dir / model_name.replace("/", "-"))
        )
        
        # Load the labeled data
        examples = trainer.load_conll_data(conll_path)
        
        # Prepare the datasets
        datasets = trainer.prepare_dataset(examples)
        
        # Train the model
        test_results = trainer.train(
            datasets,
            learning_rate=2e-5,
            batch_size=16,
            num_epochs=3
        )
        
        # Store the results
        results[model_name] = test_results
        
        print(f"\nResults for {model_name}:")
        for metric, value in test_results.items():
            print(f"- {metric}: {value:.4f}")
    
    return results

# Compare different models
# Note: This will take a long time to run
# model_comparison = compare_models()


In [None]:
# Visualize model comparison results
def visualize_results(results):
    """
    Visualize the results of the model comparison.
    
    Args:
        results: Dictionary with model names and their test results
    """
    if not results:
        print("No results to visualize")
        return
    
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # Extract metrics
    metrics = list(next(iter(results.values())).keys())
    
    # Create a DataFrame with the results
    df_results = pd.DataFrame(index=results.keys(), columns=metrics)
    
    for model_name, model_results in results.items():
        for metric, value in model_results.items():
            df_results.loc[model_name, metric] = value
    
    # Display the results
    print("Model comparison results:")
    display(df_results)
    
    # Plot the results
    plt.figure(figsize=(12, 6))
    
    for i, metric in enumerate(metrics):
        plt.subplot(1, len(metrics), i+1)
        sns.barplot(x=df_results.index, y=df_results[metric])
        plt.title(f"{metric}")
        plt.xticks(rotation=45)
        plt.ylim(0, 1)
    
    plt.tight_layout()
    plt.show()

# Visualize the model comparison results
# visualize_results(model_comparison)


In [None]:
## Summary and Next Steps

In this notebook, we have:

1. Set up the environment for fine-tuning transformer models
2. Created functions to load and prepare labeled data
3. Implemented a function to fine-tune a single model
4. Developed a framework for comparing different models
5. Added visualization tools for model comparison results

To use this notebook:

1. Complete the manual labeling in the previous notebook
2. Run the `load_labeled_data()` function to load the labeled data
3. Run the `prepare_data()` function to prepare the data for training
4. Run the `train_model()` function to fine-tune a single model
5. Alternatively, run the `compare_models()` function to compare multiple models
6. Use the `visualize_results()` function to visualize the comparison results

In the next notebook, we will explore model interpretability using SHAP and LIME to understand how our fine-tuned model makes predictions.
