# Evaluation on FLEURS (Kinyarwanda)

In [None]:
from datasets import load_dataset
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
import jiwer
import librosa
import numpy as np

# Load FLEURS test set for Kinyarwanda
fleurs = load_dataset("google/fleurs", "kinyarwanda")
test_set = fleurs["test"]

# Load a fine-tuned model and processor (update the path accordingly)
model = Wav2Vec2ForCTC.from_pretrained("./models/wav2vec2-base")
processor = Wav2Vec2Processor.from_pretrained("./models/wav2vec2-base")
model.eval()

def transcribe(batch):
    speech, _ = librosa.load(batch["audio"]["path"], sr=16000)
    inputs = processor(speech, sampling_rate=1600, return_tensors="pt", padding=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    pred_ids = torch.argmax(logits, dim=-1)
    batch["pred_text"] = processor.batch_decode(pred_ids)[0]
    return batch

sampled = test_set.select(range(50)).map(transcribe)
refs = sampled["raw_transcription"]
preds = sampled["pred_text"]

print("WER:", jiwer.wer(refs, preds))
print("CER:", jiwer.cer(refs, preds))