In [6]:
import os

# helper funtion that printing obtained fst
def write_dot_file(f, base_filename=None):
    if base_filename == None:
        index = 1
        while os.path.exists(f"fst_visualization_{index}.dot"):
            index += 1
        dot_filename = f"{base_filename}_{index}.dot"
    else:
        dot_filename = f"{base_filename}.dot"
    
    f.draw(dot_filename)

    return dot_filename

In [7]:
import openfst_python as fst

# initializing letters into 3 groups
f_vowel_arr = ['e', 'i', 'ö', 'ü']
b_vowel_arr = ['a', 'ı', 'o', 'u']
constonant_arr = ['b', 'c', 'ç', 'd', 'f', 'g', 'ğ', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'r', 's', 'ş', 't', 'v', 'y', 'z']

input_sym = fst.SymbolTable()
output_sym = fst.SymbolTable()

def fill_inputs(arr):
    for i in range (len(arr)):
        input_sym.add_symbol(arr[i])

def fill_outputs(arr):
    for i in range (len(arr)):
        output_sym.add_symbol(arr[i])

In [8]:
# adding fst symbols 
input_sym.add_symbol('front_vowel') 
input_sym.add_symbol('back_vowel')
input_sym.add_symbol('consonant')         

input_sym.add_symbol('blank')
input_sym.add_symbol('+Pl(ler)')  
input_sym.add_symbol('+Pl(lar)')
output_sym.add_symbol('<>')
output_sym.add_symbol('ler') 
output_sym.add_symbol('lar') 

fill_inputs(f_vowel_arr)  
fill_inputs(b_vowel_arr) 
fill_inputs(constonant_arr)

fill_outputs(f_vowel_arr)  
fill_outputs(b_vowel_arr) 
fill_outputs(constonant_arr)

front = input_sym.find('front_vowel') 
back = input_sym.find('back_vowel') 
cons = input_sym.find('consonant') 
suffix_ler = input_sym.find('+Pl(ler)')
suffix_lar = input_sym.find('+Pl(lar)') 
blank = output_sym.find('<>')
blank_input = input_sym.find('blank')
output_ler = output_sym.find('ler') 
output_lar = output_sym.find('lar') 

In [9]:
# creating a fsm to obtain plural words
f = fst.Fst()

s0 = f.add_state()
s1 = f.add_state()
s2 = f.add_state()
s3 = f.add_state()

f.add_arc(s0, fst.Arc(cons, blank, None, s0))
f.add_arc(s0, fst.Arc(front, blank, None, s1))
f.add_arc(s0, fst.Arc(back, blank, None, s2))
f.add_arc(s1, fst.Arc(cons, blank, None, s1))
f.add_arc(s1, fst.Arc(back, blank, None, s2))
f.add_arc(s1, fst.Arc(front, blank, None, s1))
f.add_arc(s2, fst.Arc(cons, blank, None, s2))
f.add_arc(s2, fst.Arc(front, blank, None, s1))
f.add_arc(s2, fst.Arc(back, blank, None, s2))
f.add_arc(s1, fst.Arc(blank_input, output_ler, None, s3))
f.add_arc(s2, fst.Arc(blank_input, output_lar, None, s3))

# set s0 state as initial, set s3 state as last state
f.set_final(s3)
f.set_start(s0)

f.set_input_symbols(input_sym)
f.set_output_symbols(output_sym)

f.arcsort()

# printing fsm 
write_dot_file(f)

'None_1.dot'

In [17]:
# labeling the letters into 3 groups (front vowels, back vowels and consonants)
def label_of_c(c):
    if c in f_vowel_arr:
        return front
    elif c in b_vowel_arr:
        return back
    elif c in constonant_arr:
        return cons

def transduce_sequence(f, seq):    
    seq_len = len(seq)
    curr_state = f.start()
    output = []
    
    for i in range(seq_len):
        found = False
        label = label_of_c(seq[i])
        if i == seq_len - 1:
            label = blank_input
        for arc in f.arcs(curr_state):
            if arc.ilabel == label:
                output += seq[i]
                if i == seq_len - 1:
                    output += f.output_symbols().find(arc.olabel)
                curr_state = arc.nextstate
                found = True
                # no need to keep going through other arcs, as it's determinized
                break  
        if not found:
            print("Can't transduce the sequence with provided FST")

    return output        

word = ['kalem', 'kitap', 'okul', 'aile', 'tren', 'kravat', 'kiler', 'duvar', 'hayvan', 'kolajen']

for i in word:
    print(transduce_sequence(f, i))

['k', 'a', 'l', 'e', 'm', 'l', 'e', 'r']
['k', 'i', 't', 'a', 'p', 'l', 'a', 'r']
['o', 'k', 'u', 'l', 'l', 'a', 'r']
['a', 'i', 'l', 'e', 'l', 'e', 'r']
['t', 'r', 'e', 'n', 'l', 'e', 'r']
['k', 'r', 'a', 'v', 'a', 't', 'l', 'a', 'r']
['k', 'i', 'l', 'e', 'r', 'l', 'e', 'r']
['d', 'u', 'v', 'a', 'r', 'l', 'a', 'r']
['h', 'a', 'y', 'v', 'a', 'n', 'l', 'a', 'r']
['k', 'o', 'l', 'a', 'j', 'e', 'n', 'l', 'e', 'r']
