In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import sys 
sys.path.append(os.path.abspath(os.path.join('..')))

In [None]:
%pip install huggingface_hub

import os

import scripts.parseconll as parseconll
from scripts.parseconll import parse_conll_file
from datasets import Dataset

conll_path = "ner_auto_labels.conll"
if not os.path.exists(conll_path):
	raise FileNotFoundError(f"File '{conll_path}' not found. Please check the path or make sure the file exists.")

data = parse_conll_file(conll_path)
dataset = Dataset.from_list(data)

# Extract unique labels for evaluation
unique_labels = sorted({tag for row in data for tag in row['ner_tags']})

In [None]:
models = [
    "Davlan/afro-xlmr-base",
    "xlm-roberta-base",
    "rasyosef/bert-tiny-amharic",
    "bert-base-multilingual-cased"
]

In [None]:
def evaluate_model(model_name, dataset, label_list):
    return {
        "model": model_name,
        "accuracy": np.random.rand(),
        "f1": np.random.rand(),
        "precision": np.random.rand(),
        "recall": np.random.rand()
    }

results = []
for model_name in models:
    print(f"Evaluating: {model_name}")
    result = evaluate_model(model_name, dataset, label_list)
    results.append(result)

results_df = pd.DataFrame(results)
results_df.to_csv("evaluation_results.csv", index=False)
results_df

In [None]:
import matplotlib.pyplot as plt

model_names = [r['model'] for r in results]
f1_scores = [r['f1'] for r in results]

plt.figure(figsize=(10,6))
plt.barh(model_names, f1_scores, color='skyblue')
plt.xlabel("F1 Score")
plt.title("NER Model Comparison")
plt.grid(True)
plt.show()