In [277]:
import pywrapfst as fst

In [278]:
def create_plural_fst():
    f = fst.Fst()

    # Create a weight object for transitions
    one = fst.Weight('tropical', 1.0)

    # Initial state
    start_state = f.add_state()
    f.set_start(start_state)

    # Create states for tracking the last voweld
    back_vowel_state = f.add_state()
    front_vowel_state = f.add_state()

    l_back_state = f.add_state()
    l_front_state = f.add_state()
    r_state = f.add_state()
    e_state = f.add_state()
    a_state = f.add_state()

    f.set_final(r_state)

    # Transitions for vowels and consonants
    vowels = 'aıoueiöü'
    back_vowels = 'aıou'
    front_vowels = 'eiöü'

    for ascii_val in range(352):  # Assuming ASCII input
        char = chr(ascii_val)
        if char in back_vowels:
            f.add_arc(start_state, fst.Arc(ascii_val, ascii_val, one, back_vowel_state))
            f.add_arc(back_vowel_state, fst.Arc(ascii_val, ascii_val, one, back_vowel_state))
            f.add_arc(front_vowel_state, fst.Arc(ascii_val, ascii_val, one, back_vowel_state))
        elif char in front_vowels:
            f.add_arc(start_state, fst.Arc(ascii_val, ascii_val, one, front_vowel_state))
            f.add_arc(back_vowel_state, fst.Arc(ascii_val, ascii_val, one, front_vowel_state))
            f.add_arc(front_vowel_state, fst.Arc(ascii_val, ascii_val, one, front_vowel_state))
        else:
            f.add_arc(start_state, fst.Arc(ascii_val, ascii_val, one, start_state))
            f.add_arc(back_vowel_state, fst.Arc(ascii_val, ascii_val, one, back_vowel_state))
            f.add_arc(front_vowel_state, fst.Arc(ascii_val, ascii_val, one, front_vowel_state))

    f.add_arc(back_vowel_state, fst.Arc(ord('l'), ord('l'), one, l_back_state))
    f.add_arc(l_back_state, fst.Arc(ord('a'), ord('a'), one, a_state))
    f.add_arc(a_state, fst.Arc(ord('r'), ord('r'), one, r_state))
    f.add_arc(front_vowel_state, fst.Arc(ord('l'), ord('l'), one, l_front_state))
    f.add_arc(l_front_state, fst.Arc(ord('e'), ord('e'), one, e_state))
    f.add_arc(e_state, fst.Arc(ord('r'), ord('r'), one, r_state))
    return f


In [279]:
def create_possessive_fst():
    f = fst.Fst()

    # Create a weight object for transitions
    one = fst.Weight('tropical', 1.0)

    # Initial state
    start_state = f.add_state()
    f.set_start(start_state)

    # Create states for tracking the last vowel
    m_state = f.add_state()
    e_state = f.add_state()
    a_state = f.add_state()
    o_state = f.add_state()
    oo_state = f.add_state()
    constant_a_state = f.add_state()
    constant_e_state = f.add_state()
    constant_o_state = f.add_state()
    constant_oo_state = f.add_state()
    before_m_state = f.add_state()
    
    f.set_final(m_state)
    
    for char in 'abcçdefgğhıijklmnoöprsştuüvyz':  # Assuming ASCII input
        ascii_val = ord(char)
        if char in 'aı':
            f.add_arc(start_state, fst.Arc(ascii_val, ascii_val, one, a_state))
            f.add_arc(a_state, fst.Arc(ascii_val, ascii_val, one, a_state))
            f.add_arc(constant_a_state, fst.Arc(ascii_val, ascii_val, one, a_state))
            f.add_arc(e_state, fst.Arc(ascii_val, ascii_val, one, a_state))
            f.add_arc(constant_e_state, fst.Arc(ascii_val, ascii_val, one, a_state))
            f.add_arc(o_state, fst.Arc(ascii_val, ascii_val, one, a_state))
            f.add_arc(constant_o_state, fst.Arc(ascii_val, ascii_val, one, a_state))
            f.add_arc(oo_state, fst.Arc(ascii_val, ascii_val, one, a_state))
            f.add_arc(constant_oo_state, fst.Arc(ascii_val, ascii_val, one, a_state))
        elif char in 'ei':
            f.add_arc(start_state, fst.Arc(ascii_val, ascii_val, one, e_state))
            f.add_arc(e_state, fst.Arc(ascii_val, ascii_val, one, e_state))
            f.add_arc(constant_e_state, fst.Arc(ascii_val, ascii_val, one, e_state))
            f.add_arc(a_state, fst.Arc(ascii_val, ascii_val, one, e_state))
            f.add_arc(constant_a_state, fst.Arc(ascii_val, ascii_val, one, e_state))
            f.add_arc(o_state, fst.Arc(ascii_val, ascii_val, one, e_state))
            f.add_arc(constant_o_state, fst.Arc(ascii_val, ascii_val, one, e_state))
            f.add_arc(oo_state, fst.Arc(ascii_val, ascii_val, one, e_state))
            f.add_arc(constant_oo_state, fst.Arc(ascii_val, ascii_val, one, e_state))
        elif char in 'ou':
            f.add_arc(start_state, fst.Arc(ascii_val, ascii_val, one, o_state))
            f.add_arc(o_state, fst.Arc(ascii_val, ascii_val, one, o_state))
            f.add_arc(constant_o_state, fst.Arc(ascii_val, ascii_val, one, o_state))
            f.add_arc(a_state, fst.Arc(ascii_val, ascii_val, one, o_state))
            f.add_arc(constant_a_state, fst.Arc(ascii_val, ascii_val, one, o_state))
            f.add_arc(e_state, fst.Arc(ascii_val, ascii_val, one, o_state))
            f.add_arc(constant_e_state, fst.Arc(ascii_val, ascii_val, one, o_state))
            f.add_arc(oo_state, fst.Arc(ascii_val, ascii_val, one, o_state))
            f.add_arc(constant_oo_state, fst.Arc(ascii_val, ascii_val, one, o_state))

        elif char in 'öü':
            f.add_arc(start_state, fst.Arc(ascii_val, ascii_val, one, oo_state))
            f.add_arc(oo_state, fst.Arc(ascii_val, ascii_val, one, oo_state))
            f.add_arc(constant_oo_state, fst.Arc(ascii_val, ascii_val, one, oo_state))
            f.add_arc(a_state, fst.Arc(ascii_val, ascii_val, one, oo_state))
            f.add_arc(constant_a_state, fst.Arc(ascii_val, ascii_val, one, oo_state))
            f.add_arc(e_state, fst.Arc(ascii_val, ascii_val, one, oo_state))
            f.add_arc(constant_e_state, fst.Arc(ascii_val, ascii_val, one, oo_state))
            f.add_arc(o_state, fst.Arc(ascii_val, ascii_val, one, oo_state))
            f.add_arc(constant_o_state, fst.Arc(ascii_val, ascii_val, one, oo_state))

        else :
            f.add_arc(start_state, fst.Arc(ascii_val, ascii_val, one, start_state))
            f.add_arc(constant_a_state, fst.Arc(ascii_val, ascii_val, one, constant_a_state))
            f.add_arc(a_state, fst.Arc(ascii_val, ascii_val, one, constant_a_state))
            f.add_arc(constant_e_state, fst.Arc(ascii_val, ascii_val, one, constant_e_state))
            f.add_arc(e_state, fst.Arc(ascii_val, ascii_val, one, constant_e_state))
            f.add_arc(constant_o_state, fst.Arc(ascii_val, ascii_val, one, constant_o_state))
            f.add_arc(o_state, fst.Arc(ascii_val, ascii_val, one, constant_o_state))
            f.add_arc(constant_oo_state, fst.Arc(ascii_val, ascii_val, one, constant_oo_state))
            f.add_arc(oo_state, fst.Arc(ascii_val, ascii_val, one, constant_oo_state))

            

    f.add_arc(constant_a_state, fst.Arc(ord('ı'), ord('ı'), one, before_m_state))
    f.add_arc(constant_e_state, fst.Arc(ord('i'), ord('i'), one, before_m_state))
    f.add_arc(constant_o_state, fst.Arc(ord('u'), ord('u'), one, before_m_state))
    f.add_arc(constant_oo_state, fst.Arc(ord('ü'), ord('ü'), one, before_m_state))
    f.add_arc(before_m_state, fst.Arc(ord('m'), ord('m'), one, m_state))

    for state in [a_state, e_state, o_state, oo_state]:
        f.add_arc(state, fst.Arc(ord('m'), ord('m'), one, m_state))
    return f

In [280]:
def string_to_fst(s):
    string_fst = fst.Fst()
    one = fst.Weight('tropical', 1.0)

    start_state = string_fst.add_state()
    string_fst.set_start(start_state)

    current_state = start_state
    for char in s:
        next_state = string_fst.add_state()
        string_fst.add_arc(current_state, fst.Arc(ord(char), ord(char), one, next_state))
        current_state = next_state

    string_fst.set_final(current_state)
    return string_fst

def apply_fst(morph_fst, word):
    word_fst = string_to_fst(word)
    result_fst = fst.compose(word_fst, morph_fst)
    result_fst.project(project_output=True)  # Project on the output to get the final string
    result_fst.rmepsilon()  # Remove any epsilons

    # Extract the string from the FST
    output = ""
    for state in result_fst.states():
        for arc in result_fst.arcs(state):
            if arc.olabel != 0:  # Avoid epsilons
                output += chr(arc.olabel)
    return output

In [281]:
# Assuming plural_fst_det_min is a determinized and minimized FST that adds plural suffixes
test_words = ['şamdanlar','kedi', 'arabam', 'evler', 'köpek', 'kitaplar', 'çocuk', 'kale', 'dolaplar', 'bilgisayarlar', 'masam', 'defterler']

plural_fst = create_plural_fst()
possessive_fst = create_possessive_fst()

for word in test_words:
    plural_form = apply_fst(plural_fst, word)
    possessive_form = apply_fst(possessive_fst, word)
    category = []
    
    if plural_form == word:
        category.append("plural")
    if possessive_form == word:
        category.append("possessive")
        
    category_str = " and ".join(category) if category else "neither plural nor possessive"
    print(f"{word.ljust(15)} -> Category: {category_str}")


şamdanlar       -> Category: plural
kedi            -> Category: neither plural nor possessive
arabam          -> Category: possessive
evler           -> Category: plural
köpek           -> Category: neither plural nor possessive
kitaplar        -> Category: plural
çocuk           -> Category: neither plural nor possessive
kale            -> Category: neither plural nor possessive
dolaplar        -> Category: plural
bilgisayarlar   -> Category: plural
masam           -> Category: possessive
defterler       -> Category: plural


In [282]:
import subprocess
import pywrapfst as fst

def remove_epsilons(f):
    return f.rmepsilon()

def create_symbol_table():
    # Create a symbol table with ASCII characters
    sym_table = fst.SymbolTable()
    sym_table.add_symbol('<eps>', 0)  # Epsilon must be included
    for ascii_val in range(352):  # For ASCII range
        sym_table.add_symbol(chr(ascii_val), ascii_val)
    return sym_table

# ... [Your existing functions: create_plural_fst, create_possessive_fst, string_to_fst, apply_fst] ...

def draw_fst_to_dot(f, filename, sym_table):
    f.set_input_symbols(sym_table)
    f.set_output_symbols(sym_table)
    f.draw(filename, portrait=True)

def dot_to_png(dot_file, png_file):
    subprocess.run(["dot", "-Tpng", dot_file, "-o", png_file], check=True)

# Create a symbol table
sym_table = create_symbol_table()

# Assuming plural_fst_det_min is a determinized and minimized FST that adds plural suffixes
test_words = ['şamdanlar', 'kedi', 'arabam', 'evler', 'köpek', 'kitaplar', 'çocuk', 'kale', 'dolaplar', 'bilgisayarlar', 'masam', 'defterler']

plural_fst = create_plural_fst()
possessive_fst = create_possessive_fst()

# Assign symbol tables to FSTs
plural_fst.set_input_symbols(sym_table)
plural_fst.set_output_symbols(sym_table)
possessive_fst.set_input_symbols(sym_table)
possessive_fst.set_output_symbols(sym_table)

for word in test_words:
    word_acceptor = string_to_fst(word)
    word_acceptor.set_input_symbols(sym_table)
    word_acceptor.set_output_symbols(sym_table)

    # Compose word acceptor with plural and possessive FSTs
    plural_composed_fst = fst.compose(word_acceptor, plural_fst)
    possessive_composed_fst = fst.compose(word_acceptor, possessive_fst)

    # Remove epsilon transitions
    plural_composed_fst = remove_epsilons(plural_composed_fst)
    possessive_composed_fst = remove_epsilons(possessive_composed_fst)
    # Draw to DOT files
    plural_dot_file = f"{word}_plural.dot"
    possessive_dot_file = f"{word}_possessive.dot"
    draw_fst_to_dot(plural_composed_fst, plural_dot_file, sym_table)
    draw_fst_to_dot(possessive_composed_fst, possessive_dot_file, sym_table)

    # Convert DOT files to PNG images
    plural_png_file = f"{word}_plural.png"
    possessive_png_file = f"{word}_possessive.png"
    dot_to_png(plural_dot_file, plural_png_file)
    dot_to_png(possessive_dot_file, possessive_png_file)

    print(f"Visualizations for '{word}' created: {plural_png_file} and {possessive_png_file}")


Visualizations for 'şamdanlar' created: şamdanlar_plural.png and şamdanlar_possessive.png
Visualizations for 'kedi' created: kedi_plural.png and kedi_possessive.png
Visualizations for 'arabam' created: arabam_plural.png and arabam_possessive.png
Visualizations for 'evler' created: evler_plural.png and evler_possessive.png
Visualizations for 'köpek' created: köpek_plural.png and köpek_possessive.png
Visualizations for 'kitaplar' created: kitaplar_plural.png and kitaplar_possessive.png
Visualizations for 'çocuk' created: çocuk_plural.png and çocuk_possessive.png
Visualizations for 'kale' created: kale_plural.png and kale_possessive.png
Visualizations for 'dolaplar' created: dolaplar_plural.png and dolaplar_possessive.png
Visualizations for 'bilgisayarlar' created: bilgisayarlar_plural.png and bilgisayarlar_possessive.png
Visualizations for 'masam' created: masam_plural.png and masam_possessive.png
Visualizations for 'defterler' created: defterler_plural.png and defterler_possessive.png
