In [1]:
# Similar as noun_pl_correction, define a function that correct based on the case of the noun.

def noun_correction(estimated_file, correct_file):
    def is_front_vowel(v):
        return v in "eiöü"

    def is_back_vowel(v):
        return v in "aıou"
    
    def is_round_vowel(v):
        return v in "ouöü"
    
    def is_unround_vowel(v):
        return v in "eiaı"
    
    def is_str_case(msd):
        return any(case in msd for case in ["DAT", "LOC", "ABL"])

    def is_obl_case(msd):
        return any(case in msd for case in ["ACC", "GEN"])

    def noun_case_suffix(lemma, msd):
        # Determine the correct case suffix based on the last vowel and msd
        for char in reversed(lemma):
            if is_back_vowel(char):
                if is_obl_case(msd):
                    return "a"
                elif is_str_case(msd):
                    if is_unround_vowel(char):
                        return "ı"
                    elif is_round_vowel(char):
                        return "u"
            elif is_front_vowel(char):
                if is_obl_case(msd):
                    return "e"
                elif is_str_case(msd):
                    if is_unround_vowel(char):
                        return "i"
                    elif is_round_vowel(char):
                        return "ü"

    def noun_case_correction(output, lemma, msd):
        suffix_start = len(lemma)
        suffix = output[suffix_start:]

        expected_suffix = noun_case_suffix(lemma, msd)

        if expected_suffix and suffix != expected_suffix:
            return output[:suffix_start] + expected_suffix
        return output

    with open(estimated_file, 'r', encoding='utf8') as output, open(correct_file, 'r', encoding='utf8') as correct:
        estimated_lines = [line.strip() for line in output if line.strip()]
        correct_lines = [line.strip() for line in correct if line.strip()]

    total = 0
    correct_before = 0
    correct_after = 0

    for estimated_line, correct_line in zip(estimated_lines, correct_lines):
        lemma, msd, predicted = estimated_line.split('\t')
        lemma_correct, msd_correct, correct = correct_line.split('\t')

        assert lemma == lemma_correct and msd == msd_correct

        if predicted == correct:
            correct_before += 1

        corrected = predicted

        if 'N' in msd:
            corrected = noun_case_correction(corrected, lemma, msd)

        if corrected == correct:
            correct_after += 1

        total += 1

    accuracy_before = correct_before / total
    accuracy_after = correct_after / total

    return accuracy_before, accuracy_after

accuracy_before, accuracy_after = noun_correction('data/tur.out', 'data/tur.dev')
print(f"Accuracy Before Correction: {accuracy_before:.2%}")
print(f"Accuracy After Correction: {accuracy_after:.2%}")

Accuracy Before Correction: 68.90%
Accuracy After Correction: 42.70%
