In [5]:
def read_nouns(filename):
    with open(filename, "r") as f:
        for line in f:
            word = line.strip().lower()
            if word:  
                yield word

nouns = set(read_nouns("brown_nouns.txt"))
print(f"Loaded {len(nouns)} nouns.")

def transition(state, word):
    if state == 'START':
        if word.endswith('es'):
            return 'ES_CHECK'
        elif word.endswith('ies') and len(word) > 3:
            return 'IES_CHECK'
        elif word.endswith('s') and len(word) > 1:
            return 'S_CHECK'
        elif word in nouns and not word.endswith('s'):
            return 'SINGULAR'
        else:
            return 'FAIL'

    elif state == 'ES_CHECK':
        possible_root = word[:-2]
        if possible_root.endswith(("s", "x", "z")) or possible_root.endswith(("ch", "sh")):
            if possible_root in nouns:
                return ('OUTPUT', possible_root, 'PL')
        return 'FAIL'

    elif state == 'IES_CHECK':
        possible_root = word[:-3] + 'y'
        if len(possible_root) >= 2 and possible_root[-2] not in 'aeiou':
            if possible_root in nouns:
                return ('OUTPUT', possible_root, 'PL')
        return 'FAIL'

    elif state == 'S_CHECK':
        possible_root = word[:-1]
        if possible_root in nouns:
            if not (possible_root.endswith(("s", "x", "z", "ch", "sh")) or
                    (possible_root.endswith("y") and possible_root[-2] not in "aeiou")):
                return ('OUTPUT', possible_root, 'PL')
        return 'FAIL'

    elif state == 'SINGULAR':
        return ('OUTPUT', word, 'SG')

    else:
        return 'FAIL'


def get_root_fsm(word):
    state = 'START'
    while True:
        result = transition(state, word)
        if isinstance(result, tuple) and result[0] == 'OUTPUT':
            _, root, tag = result
            return root, tag
        elif result == 'FAIL':
            return None, None
        else:
            state = result


# Example usage - testing some words
test_words = ["female", "females", "egg", "eggs", "boy", "boys", "mother", "mothers"]

for w in test_words:
    root, tag = get_root_fsm(w.lower())
    if root and tag:
        print(f"{w} = {root}+N+{tag}")
    else:
        print(f"{w} = Unknown")

Loaded 17892 nouns.
female = female+N+SG
females = Unknown
egg = egg+N+SG
eggs = egg+N+PL
boy = boy+N+SG
boys = boy+N+PL
mother = mother+N+SG
mothers = mother+N+PL
