### 1. Input Data
- Read all nouns from ```brown.txt``` and store in a set called ```nouns```

In [24]:
import string

# STEP 1: Load Brown corpus nouns
with open("brown_nouns.txt", "r") as f:
    nouns = set(w.strip().lower() for w in f if w.strip())

### 2. States
- START : FST begins here
- NORMAL : copying input letters to output without special changes
- FOUND_Y : special ending replace y with ie and add s
- FOUND_S : special ending add es and not just s

In [25]:
states = {
    "START",        # Beginning of the word
    "NORMAL",       # Normal letter copying
    "FOUND_Y",      # Ended with consonant+y
    "FOUND_S",      # Ended with s/z/x/ch/sh -> needs 'es'
    "FOUND_C",      # Just saw 'c' (check if next is 'h')
    "FOUND_S2",     # Just saw 's' (check if next is 'h')
    "END"           # Final state
}

### 3. Input and Output Alphabets
- Input alphabets : all lowercase and ε (empty string)
- Output alphabet : all lowercase and special markers 

In [26]:
import string
input_alphabet = set(string.ascii_lowercase) | {"ε"}
output_alphabet = set(string.ascii_lowercase) | {"ε", "N", "SG", "PL", "+"}

### 4. Transition Table
- At the beginning, read a letter, go to NORMAL and output it
- In NORMAL state, read another letter stays in NORMAL and output it

In [27]:
transitions = {}

for ch in string.ascii_lowercase:
    transitions[("START", ch)] = ("NORMAL", ch)
    transitions[("NORMAL", ch)] = ("NORMAL", ch)

### Special transitions for Morphology

#### Y replacement
- If in NORMAL and see y, output ie and go to FOUND_Y
- From FOUND_Y, add s in epilson transition

#### E-insertion endings (s,z,x)

In [28]:
# --- Special detection for 'y' ending ---
transitions[("NORMAL", "y")] = ("FOUND_Y", "ie")  # replace y with ie

# --- Special detection for s/z/x endings (single letter) ---
for ch in ["z", "x"]:
    transitions[("NORMAL", ch)] = ("FOUND_S", ch)

# --- Special handling for 's' ---
# Might be: 
#  1. Standalone 's' ending (needs 'es')
#  2. Start of 'sh' ending (needs 'es')
transitions[("NORMAL", "s")] = ("FOUND_S2", "s")

# --- Special handling for 'c' ---
# Might be:
#  1. Standalone 'c' (normal)
#  2. Part of 'ch' ending (needs 'es')
transitions[("NORMAL", "c")] = ("FOUND_C", "c")

# --- Detect 'ch' ---
transitions[("FOUND_C", "h")] = ("FOUND_S", "h")  # ch ending -> 'es'

# If FOUND_C followed by anything else, go back to NORMAL
for ch in string.ascii_lowercase:
    if ch != "h":
        transitions[("FOUND_C", ch)] = ("NORMAL", ch)

# --- Detect 'sh' ---
transitions[("FOUND_S2", "h")] = ("FOUND_S", "h")  # sh ending -> 'es'

# If FOUND_S2 followed by anything else, treat as s-ending -> FOUND_S
for ch in string.ascii_lowercase:
    if ch != "h":
        transitions[("FOUND_S2", ch)] = ("FOUND_S", ch)

# --- Epsilon transitions for final suffix ---
transitions[("FOUND_Y", "ε")] = ("END", "s")   # try -> tries
transitions[("FOUND_S", "ε")] = ("END", "es")  # fox -> foxes, bush -> bushes
transitions[("NORMAL", "ε")]  = ("END", "s")   # bag -> bags

In [29]:
def run_fst(word):
    if word not in nouns:
        return "Invalid Word"

    state = "START"
    output = ""

    for ch in word:
        if (state, ch) in transitions:
            state, out = transitions[(state, ch)]
            output += out
        else:
            return "Invalid Word"

    # Final epsilon transition to add plural suffix
    if (state, "ε") in transitions:
        state, out = transitions[(state, "ε")]
        output += out

    if state != "END":
        return "Invalid Word"

    singular_features = f"{word}+N+SG"
    plural_features = f"{word}+N+PL"
    return {
        "singular_form": word,
        "singular_features": singular_features,
        "plural_form": output,
        "plural_features": plural_features
    }

In [30]:
test_words = ["fox", "watch", "bush", "try", "bag", "foxs"]

for w in test_words:
    result = run_fst(w)
    if result == "Invalid Word":
        print(f"{w} = Invalid Word")
    else:
        print(f"{result['plural_form']} = {result['plural_features']}")
        print(f"{result['singular_form']} = {result['singular_features']}")
        print("---")

foxes = fox+N+PL
fox = fox+N+SG
---
watches = watch+N+PL
watch = watch+N+SG
---
bushes = bush+N+PL
bush = bush+N+SG
---
tries = try+N+PL
try = try+N+SG
---
bags = bag+N+PL
bag = bag+N+SG
---
foxs = Invalid Word
