In [None]:
# !pip install evaluate
# !pip install jiwer

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6


In [None]:
from evaluate import load

def parse_conll(path):
    references = {}
    current_id = None
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line.startswith("# id:"):
                current_id = line.split(":", 1)[1].strip()
            elif line.startswith("# text TAT:"):
                text = line.replace("# text TAT:", "", 1).strip()
                if current_id and text:
                    references[current_id] = text
    return references

def parse_soyle(path):
    predictions = {}
    current_id = None
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line.startswith("# id:"):
                current_id = line.split(":", 1)[1].strip()
            elif line and current_id:
                predictions[current_id] = line
                current_id = None
    return predictions

def compute_metrics(references, predictions):
    wer_metric = load("wer")
    cer_metric = load("cer")

    ids = sorted(set(references.keys()) & set(predictions.keys()))
    refs = [references[i].lower() for i in ids]
    preds = [predictions[i].lower() for i in ids]

    wer = wer_metric.compute(references=refs, predictions=preds)
    cer = cer_metric.compute(references=refs, predictions=preds)
    return wer, cer, ids, refs, preds


if __name__ == "__main__":
    conll_file = "/content/drive/MyDrive/Курсовая_3_курс/Голосовые/Текст/tt.test_final.conll"
    soyle_file = "/content/drive/MyDrive/Курсовая_3_курс/Голосовые/Текст/soyle_results_full.txt"

    refs = parse_conll(conll_file)
    preds = parse_soyle(soyle_file)

    wer, cer, ids, refs_list, preds_list = compute_metrics(refs, preds)

    print(f"WER: {wer:.4f}")
    print(f"CER: {cer:.4f}")
    print("\nПримеры сравнения:")
    for i, r, p in zip(ids[:10], refs_list[:10], preds_list[:10]):  # первые 10 для примера
        print(f"{i}:\n  REF: {r}\n  PRD: {p}")


Downloading builder script: 0.00B [00:00, ?B/s]

WER: 0.2478
CER: 0.0777

Примеры сравнения:
test_1:
  REF: бөтен хәтерләтүләрне күрсәт
  PRD: бөтен хәтерләтүләрне күрсәт
test_10:
  REF: бөтен будильникларны күрсәт .
  PRD: бөтен бу дип никларны күрсәт
test_100:
  REF: бүген нинди температура булачак ?
  PRD: бүген нинди температура булачак
test_101:
  REF: бүген кызу булырмы ?
  PRD: бүген кызу булырмы
test_102:
  REF: иртәгә яңгыр булырмы ?
  PRD: иртәгә яңгыр булырмы
test_103:
  REF: чираттагы будильникны бетер
  PRD: чираттагы будильникны бетер
test_104:
  REF: бүген кибеткә барырга искә төшер
  PRD: бүген кибеткә барырга искә төшер
test_105:
  REF: киләсе атнада минем искә төшерүләрне бетер
  PRD: киләсе аныңда минем искә төшерүләрне бетер
test_106:
  REF: сишәмбе көнне көндезге сәгать өчтә этне грумерга алып барырга исемә төшер
  PRD: сишәмбе көнне көндезгә сәгать өчте этне гомергә алып барырга исемә төшер
test_107:
  REF: егерме сигезенче көнне эльвирага еллыкка чәчәкләр алырга исемә төшер.
  PRD: 28 нче көнне эльвира гыеллыкк

In [None]:
import os
import re
import pandas as pd
from evaluate import load

wer_metric = load("wer")
cer_metric = load("cer")

# Нормализация текста
def normalize_text(text: str) -> str:
    # убрать всё, кроме букв, цифр и пробелов
    text = re.sub(r"[^\w\s]", "", text, flags=re.UNICODE)
    # привести к нижнему регистру
    text = text.lower()
    # убрать лишние пробелы
    text = re.sub(r"\s+", " ", text).strip()
    return text


def load_references(conll_path):
    refs = {}
    current_id = None
    with open(conll_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line.startswith("# id:"):
                current_id = line.split(":", 1)[1].strip()
            elif line.startswith("# text TAT:"):
                m = re.match(r"# text TAT:\s*(.*)", line)
                if m and current_id:
                    raw_text = m.group(1).strip()
                    refs[current_id] = {
                        "raw": raw_text,
                        "norm": normalize_text(raw_text)
                    }
    return refs


def load_predictions(txt_path):
    preds = {}
    current_id = None
    with open(txt_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line.startswith("# id:"):
                raw_id = line.split(":", 1)[1].strip().replace(".wav", "")
                if not raw_id.startswith("test_"):
                    raw_id = "test_" + raw_id.split("_")[-1]
                current_id = raw_id
            elif line and not line.startswith("#"):
                if current_id:
                    preds[current_id] = {
                        "raw": line,
                        "norm": normalize_text(line)
                    }
    return preds

# Считаем метрики и сохраняем несовпадения
def evaluate_file(conll_path, results_path, mismatches_dir="mismatches"):
    references = load_references(conll_path)
    predictions = load_predictions(results_path)

    y_true, y_pred = [], []
    mismatches = []

    for uid, pred in predictions.items():
        if uid in references:
            ref = references[uid]
            y_true.append(ref["norm"])
            y_pred.append(pred["norm"])

            if ref["norm"] != pred["norm"]:
                mismatches.append({
                    "id": uid,
                    "reference_raw": ref["raw"],
                    "prediction_raw": pred["raw"],
                    "reference_norm": ref["norm"],
                    "prediction_norm": pred["norm"]
                })

    wer = wer_metric.compute(references=y_true, predictions=y_pred)
    cer = cer_metric.compute(references=y_true, predictions=y_pred)

    # сохраняем несовпадения
    os.makedirs(mismatches_dir, exist_ok=True)
    mism_path = os.path.join(
        mismatches_dir, os.path.basename(results_path).replace(".txt", "_mismatches.csv")
    )
    pd.DataFrame(mismatches).to_csv(mism_path, index=False, encoding="utf-8-sig")

    return {
        "file": os.path.basename(results_path),
        "WER": wer,
        "CER": cer,
        "mismatches_file": mism_path
    }

if __name__ == "__main__":
    conll_path = "/content/drive/MyDrive/Курсовая_3_курс/Голосовые/Text/Test/tt.test_final.conll"
    results_dir = "/content/drive/MyDrive/Курсовая_3_курс/Голосовые/Text/Söyle"   # поменяй на папку, где лежат soyle_results_*.txt

    results = []
    for fname in os.listdir(results_dir):
        if fname.startswith("soyle_results_") and fname.endswith(".txt"):
            res = evaluate_file(conll_path, os.path.join(results_dir, fname))
            results.append(res)
            print(res)

    # Общая таблица по всем файлам
    df = pd.DataFrame(results)
    df.to_csv("wer_cer_results.csv", index=False, encoding="utf-8-sig")
    print("Сводные WER/CER сохранены в wer_cer_results.csv")


{'file': 'soyle_results_full.txt', 'WER': 0.17679970160387914, 'CER': 0.055241548469659806, 'mismatches_file': 'mismatches/soyle_results_full_mismatches.csv'}
{'file': 'soyle_results_m_41.txt', 'WER': 0.11952191235059761, 'CER': 0.03028603477285474, 'mismatches_file': 'mismatches/soyle_results_m_41_mismatches.csv'}
{'file': 'soyle_results_m_27.txt', 'WER': 0.2300556586270872, 'CER': 0.0783601453035807, 'mismatches_file': 'mismatches/soyle_results_m_27_mismatches.csv'}
{'file': 'soyle_results_w_64.txt', 'WER': 0.13671875, 'CER': 0.04359823399558499, 'mismatches_file': 'mismatches/soyle_results_w_64_mismatches.csv'}
{'file': 'soyle_results_w_58.txt', 'WER': 0.17898832684824903, 'CER': 0.050980392156862744, 'mismatches_file': 'mismatches/soyle_results_w_58_mismatches.csv'}
{'file': 'soyle_results_w_54.txt', 'WER': 0.15658362989323843, 'CER': 0.04689942678478374, 'mismatches_file': 'mismatches/soyle_results_w_54_mismatches.csv'}
{'file': 'soyle_results_w_39.txt', 'WER': 0.15666666666666668