In [None]:
# Install required packages
%pip install torch transformers datasets shap lime matplotlib seaborn plotly --quiet
import torch
import json
import matplotlib.pyplot as plt
import seaborn as sns

# Transformers
from transformers import (
    AutoTokenizer, 
    AutoModelForTokenClassification,
)

import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


In [None]:
# Load the best performing model (assuming from Task 4)
def load_trained_model(model_path="../models/xlm-roberta-amharic-ner-final"):
    """Load a trained model and tokenizer"""
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        model = AutoModelForTokenClassification.from_pretrained(model_path)
        
        # Load label mappings
        with open(f"{model_path}/label_mappings.json", 'r', encoding='utf-8') as f:
            label_mappings = json.load(f)
            
        return model, tokenizer, label_mappings
    except:
        print("Trained model not found. Please run Task 3 first.")
        return None, None, None

# Load model
model, tokenizer, label_mappings = load_trained_model()

if model is not None:
    print("Model loaded successfully!")
    print(f"Labels: {list(label_mappings['label2id'].keys())}")
else:
    print("Please train a model first using Task 3 notebook")


In [None]:
def predict_entities(text, model, tokenizer):
    """Predict entities in a given text"""
    if model is None:
        print("No model loaded. Please run Task 3 first.")
        return []
        
    # Tokenize the text
    tokens = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    
    # Get predictions
    with torch.no_grad():
        outputs = model(**tokens)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_token_class = torch.argmax(predictions, dim=-1)
    
    # Convert to labels
    id2label = model.config.id2label
    predicted_labels = [id2label[pred.item()] for pred in predicted_token_class[0]]
    
    # Get tokens
    tokens_list = tokenizer.convert_ids_to_tokens(tokens['input_ids'][0])
    
    # Filter out special tokens
    filtered_tokens_labels = []
    for token, label in zip(tokens_list, predicted_labels):
        if token not in ['<s>', '</s>', '<pad>', '<unk>']:
            filtered_tokens_labels.append((token, label))
    
    return filtered_tokens_labels

# Test sentences for analysis
test_sentences = [
    "ቦርሳ በጣም ጥሩ! ዋጋ 5000 ብር። ቦሌ ውስጥ ይገኛል።",
    "cream በጣም ጥሩ! ዋጋ 1200 ብር። መርካቶ ውስጥ ይገኛል።",
    "ጫማ እና ሻርቶች ዋጋ 2500 ብር። አዲስ አበባ ውስጥ ይገኛል።",
    "iPhone በጣም ቆንጆ! ዋጋ 45000 ብር። ፒያሳ ውስጥ ይገኛል።"
]

if model is not None:
    print("Testing entity prediction on sample texts:")
    for i, sentence in enumerate(test_sentences):
        print(f"\nSentence {i+1}: {sentence}")
        predictions = predict_entities(sentence, model, tokenizer)
        print("Predicted entities:")
        for token, label in predictions:
            if label != 'O':
                print(f"  {token:<15} -> {label}")
else:
    print("Model not available. Please run Task 3 first to train a model.")
