In [30]:
import pywrapfst as fst
import graphviz

In [31]:
def create_plural_fst():
    front_vowels = ['e', 'i', 'ö', 'ü']
    back_vowels = ['a', 'ı', 'o', 'u']
    consonants = "bcçdfgğhjklmnprsştvyz"  # Add more if needed

    sym_table = fst.SymbolTable()
    sym_table.add_symbol('<eps>', 0)
    for char in front_vowels + back_vowels + list(consonants) + ['ler', 'lar', '+PL']:
        sym_table.add_symbol(char)

    plural_fst = fst.Fst()
    plural_fst.set_input_symbols(sym_table)
    plural_fst.set_output_symbols(sym_table)

    one = fst.Weight('tropical', 1.0)

    start_state = plural_fst.add_state()
    front_state = plural_fst.add_state()
    back_state = plural_fst.add_state()
    end_state = plural_fst.add_state()

    plural_fst.set_start(start_state)
    plural_fst.set_final(end_state)

    for vowel in front_vowels:
        plural_fst.add_arc(start_state, fst.Arc(sym_table.find(vowel), sym_table.find(vowel), one, front_state))
        plural_fst.add_arc(back_state, fst.Arc(sym_table.find(vowel), sym_table.find(vowel), one, front_state))
        plural_fst.add_arc(front_state, fst.Arc(sym_table.find(vowel), sym_table.find(vowel), one, front_state))
    for vowel in back_vowels:
        plural_fst.add_arc(start_state, fst.Arc(sym_table.find(vowel), sym_table.find(vowel), one, back_state))
        plural_fst.add_arc(front_state, fst.Arc(sym_table.find(vowel), sym_table.find(vowel), one, back_state))
        plural_fst.add_arc(back_state, fst.Arc(sym_table.find(vowel), sym_table.find(vowel), one, back_state))

    plural_fst.add_arc(front_state, fst.Arc(sym_table.find('+PL'), sym_table.find('ler'), one, end_state))
    plural_fst.add_arc(back_state, fst.Arc(sym_table.find('+PL'), sym_table.find('lar'), one, end_state))

    # Looping back for consonants and non-vowel characters
    for consonant in consonants:
        consonant_id = sym_table.find(consonant)
        plural_fst.add_arc(start_state, fst.Arc(consonant_id, consonant_id, one, start_state))
        plural_fst.add_arc(front_state, fst.Arc(consonant_id, consonant_id, one, front_state))
        plural_fst.add_arc(back_state, fst.Arc(consonant_id, consonant_id, one, back_state))
    
    return plural_fst

# Create and test the FST
plural_fst = create_plural_fst()
plural_fst.write('plural.fst')
!fstdraw plural.fst | dot -Tpng > plural.png

In [32]:
def transduce_sequence_det(f, seq):
    eps = f.input_symbols().find('<eps>')
    curr_state = f.start()
    output = []

    for char in seq:
        label = f.input_symbols().find(char)
        if label == -1:
            print(f"Character '{char}' not found in FST's input symbols.")
            return []

        found = False
        for arc in f.arcs(curr_state):
            if arc.ilabel == label:
                output.append(arc.olabel)
                curr_state = arc.nextstate
                found = True
                break

        if not found:
            print(f"No transition for '{char}' in current state.")
            return []

    final_weight = float(f.final(curr_state))
    if final_weight != math.inf:  # if this is a final state
        out_seq = [f.output_symbols().find(w) for w in output]
        return out_seq
    else:
        print("Reached a non-final state at the end of the sequence.")
        return []

In [33]:
import math
def pluralize_word(word, plural_fst):
    char_sequence = list(word)
    char_sequence = char_sequence + ['+PL']
    transduced_sequence = transduce_sequence_det(plural_fst, char_sequence)

    # Decode each byte string in the sequence to a regular string
    decoded_sequence = [symbol.decode('utf-8') if isinstance(symbol, bytes) else symbol for symbol in transduced_sequence]

    # Join the sequence to form the pluralized word
    pluralized_word = ''.join(decoded_sequence)
    return pluralized_word

In [34]:
def possessive_fst():
    _table = fst.SymbolTable()
    _fst = fst.Fst()
    
    def create_symbol_table():
        # Define the list of symbols
        symbols = ['+1ps', 'm', 'im', '+2ps', 'n', 'in', '+3ps', 'si', 
                'i', '+1pm', 'miz', 'imiz', '+2pm', 'niz', 'iniz', 
                '+3pm', 'leri','üm', 'ün', 'sü', 'ü', 'müz', 'ümüz', 
                'nüz', 'ünüz', 'ları',
                'ım', 'ın', 'sı', 'ı', 'mız', 'ımız', 'nız', 'ınız',
                'um', 'un', 'su', 'u', 'umuz', 'unuz']

        # Create the symbol table
        table = fst.SymbolTable()

        # Add the symbols to the symbol table
        for symbol in symbols:
            table.add_symbol(symbol)

        return table

# Use the function to create the symbol table
    _table = create_symbol_table()
    
    front_vowels_ei = ['e', 'i']
    front_vowels_ou = ['ö', 'ü']
    back_vowels_ai = ['a', 'ı']
    back_vowels_ou = ['o', 'u']

    front_vowels = front_vowels_ei + front_vowels_ou
    back_vowels = back_vowels_ai + back_vowels_ou
    consonants = "bcçdfgğhjklmnprsştvyz"  # Add more if needed
    
    for char in front_vowels + back_vowels + list(consonants):
        _table.add_symbol(char)
        
    _fst.set_input_symbols(_table)
    _fst.set_output_symbols(_table)

    one = None
    
    start_state = _fst.add_state()

    bw_ou_c_state = _fst.add_state()
    bw_ai_c_state = _fst.add_state()
    fw_ei_c_state = _fst.add_state()
    fw_ou_c_state = _fst.add_state()
    
    fw_ou_state = _fst.add_state()
    fw_ei_state = _fst.add_state()
    bw_ai_state = _fst.add_state()
    bw_ou_state = _fst.add_state()

    end_state = _fst.add_state()
    
    _fst.set_start(start_state)
    _fst.set_final(end_state)

    for vowel in front_vowels_ei:
        _fst.add_arc(start_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ei_state))
        _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ei_state))
        _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ei_state))
        _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ei_state))
        _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ei_state))
        _fst.add_arc(fw_ou_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ei_state))
        _fst.add_arc(fw_ei_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ei_state))

    for vowel in front_vowels_ou:
        _fst.add_arc(start_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ou_state))
        _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ou_state))
        _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ou_state))
        _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ou_state))
        _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ou_state))
        _fst.add_arc(fw_ou_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ou_state))
        _fst.add_arc(fw_ei_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, fw_ou_state))

    for vowel in back_vowels_ai:
        _fst.add_arc(start_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ai_state))
        _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ai_state))
        _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ai_state))
        _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ai_state))
        _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ai_state))
        _fst.add_arc(bw_ai_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ai_state))
        _fst.add_arc(bw_ou_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ai_state))

    for vowel in back_vowels_ou:
        _fst.add_arc(start_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ou_state))
        _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ou_state))
        _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ou_state))
        _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ou_state))
        _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ou_state))
        _fst.add_arc(bw_ai_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ou_state))
        _fst.add_arc(bw_ou_state, fst.Arc(_table.find(vowel), _table.find(vowel), one, bw_ou_state))

    #plural_fst.add_arc(back_state, fst.Arc(sym_table.find(vowel), sym_table.find(vowel), one, front_state))
    
    _fst.add_arc(fw_ei_state, fst.Arc(_table.find('+1ps'),_table.find('m'), one, end_state))
    _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find('+1ps'),_table.find('im'), one, end_state))
    _fst.add_arc(fw_ei_state, fst.Arc(_table.find('+2ps'),_table.find('n'), one, end_state))
    _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find('+2ps'),_table.find('in'), one, end_state))
    _fst.add_arc(fw_ei_state, fst.Arc(_table.find('+3ps'),_table.find('si'), one, end_state))
    _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find('+3ps'),_table.find('i'), one, end_state))
    _fst.add_arc(fw_ei_state, fst.Arc(_table.find('+1pm'),_table.find('miz'), one, end_state))
    _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find('+1pm'),_table.find('imiz'), one, end_state))
    _fst.add_arc(fw_ei_state, fst.Arc(_table.find('+2pm'),_table.find('niz'), one, end_state))
    _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find('+2pm'),_table.find('iniz'), one, end_state))
    _fst.add_arc(fw_ei_state, fst.Arc(_table.find('+3pm'),_table.find('leri'), one, end_state))
    _fst.add_arc(fw_ei_c_state, fst.Arc(_table.find('+3pm'),_table.find('leri'), one, end_state))

    _fst.add_arc(fw_ou_state, fst.Arc(_table.find('+1ps'),_table.find('m'), one, end_state))
    _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find('+1ps'),_table.find('üm'), one, end_state))
    _fst.add_arc(fw_ou_state, fst.Arc(_table.find('+2ps'),_table.find('n'), one, end_state))
    _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find('+2ps'),_table.find('ün'), one, end_state))
    _fst.add_arc(fw_ou_state, fst.Arc(_table.find('+3ps'),_table.find('sü'), one, end_state))
    _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find('+3ps'),_table.find('ü'), one, end_state))
    _fst.add_arc(fw_ou_state, fst.Arc(_table.find('+1pm'),_table.find('müz'), one, end_state))
    _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find('+1pm'),_table.find('ümüz'), one, end_state))
    _fst.add_arc(fw_ou_state, fst.Arc(_table.find('+2pm'),_table.find('nüz'), one, end_state))
    _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find('+2pm'),_table.find('ünüz'), one, end_state))
    _fst.add_arc(fw_ou_state, fst.Arc(_table.find('+3pm'),_table.find('leri'), one, end_state))
    _fst.add_arc(fw_ou_c_state, fst.Arc(_table.find('+3pm'),_table.find('leri'), one, end_state))

    _fst.add_arc(bw_ai_state, fst.Arc(_table.find('+1ps'),_table.find('m'), one, end_state))
    _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find('+1ps'),_table.find('ım'), one, end_state))
    _fst.add_arc(bw_ai_state, fst.Arc(_table.find('+2ps'),_table.find('n'), one, end_state))
    _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find('+2ps'),_table.find('ın'), one, end_state))
    _fst.add_arc(bw_ai_state, fst.Arc(_table.find('+3ps'),_table.find('sı'), one, end_state))
    _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find('+3ps'),_table.find('ı'), one, end_state))
    _fst.add_arc(bw_ai_state, fst.Arc(_table.find('+1pm'),_table.find('mız'), one, end_state))
    _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find('+1pm'),_table.find('ımız'), one, end_state))
    _fst.add_arc(bw_ai_state, fst.Arc(_table.find('+2pm'),_table.find('nız'), one, end_state))
    _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find('+2pm'),_table.find('ınız'), one, end_state))
    _fst.add_arc(bw_ai_state, fst.Arc(_table.find('+3pm'),_table.find('ları'), one, end_state))
    _fst.add_arc(bw_ai_c_state, fst.Arc(_table.find('+3pm'),_table.find('ları'), one, end_state))

    _fst.add_arc(bw_ou_state, fst.Arc(_table.find('+1ps'),_table.find('m'), one, end_state))
    _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find('+1ps'),_table.find('um'), one, end_state))
    _fst.add_arc(bw_ou_state, fst.Arc(_table.find('+2ps'),_table.find('n'), one, end_state))
    _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find('+2ps'),_table.find('un'), one, end_state))
    _fst.add_arc(bw_ou_state, fst.Arc(_table.find('+3ps'),_table.find('su'), one, end_state))
    _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find('+3ps'),_table.find('u'), one, end_state))
    _fst.add_arc(bw_ou_state, fst.Arc(_table.find('+1pm'),_table.find('mız'), one, end_state))
    _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find('+1pm'),_table.find('umuz'), one, end_state))
    _fst.add_arc(bw_ou_state, fst.Arc(_table.find('+2pm'),_table.find('nız'), one, end_state))
    _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find('+2pm'),_table.find('unuz'), one, end_state))
    _fst.add_arc(bw_ou_state, fst.Arc(_table.find('+3pm'),_table.find('ları'), one, end_state))
    _fst.add_arc(bw_ou_c_state, fst.Arc(_table.find('+3pm'),_table.find('ları'), one, end_state))
    
    # Looping back for consonants and non-vowel characters
    for consonant in consonants:
        consonant_id = _table.find(consonant)
        _fst.add_arc(start_state, fst.Arc(consonant_id, consonant_id, one, start_state))
        _fst.add_arc(bw_ai_state, fst.Arc(consonant_id, consonant_id, one, bw_ai_c_state))
        _fst.add_arc(bw_ou_state, fst.Arc(consonant_id, consonant_id, one, bw_ou_c_state))
        _fst.add_arc(fw_ei_state, fst.Arc(consonant_id, consonant_id, one, fw_ei_c_state))
        _fst.add_arc(fw_ou_state, fst.Arc(consonant_id, consonant_id, one, fw_ou_c_state))
        _fst.add_arc(bw_ai_c_state, fst.Arc(consonant_id, consonant_id, one, bw_ai_c_state))
        _fst.add_arc(bw_ou_c_state, fst.Arc(consonant_id, consonant_id, one, bw_ou_c_state))
        _fst.add_arc(fw_ei_c_state, fst.Arc(consonant_id, consonant_id, one, fw_ei_c_state))
        _fst.add_arc(fw_ou_c_state, fst.Arc(consonant_id, consonant_id, one, fw_ou_c_state))
    
    
    return _fst

p_fst = possessive_fst()
p_fst.write('possessive.fst')
!fstdraw possessive.fst | dot -Tpng > possessive.png

In [35]:
import math
def possessive_word(word, plural_fst, specifier):
    char_sequence = list(word)
    char_sequence = char_sequence + [specifier]
    transduced_sequence = transduce_sequence_det(plural_fst, char_sequence)

    # Decode each byte string in the sequence to a regular string
    decoded_sequence = [symbol.decode('utf-8') if isinstance(symbol, bytes) else symbol for symbol in transduced_sequence]
    # Join the sequence to form the pluralized word
    pluralized_word = ''.join(decoded_sequence)
    return pluralized_word

In [40]:
def test_possessive_suffix():
    p_fst = possessive_fst()

    # Define the test cases
    test_cases = [
        ('kulaklık', '+3ps', 'kulaklığı'),
        ('kömür', '+2pm', 'kömürünüz'),
        ('ev', '+2ps', 'evin'),
        ('araba', '+1pm', 'arabamız'),
        ('kalem', '+2pm', 'kaleminiz'),
        ('çocuk', '+3pm', 'çocukları'),
        ('okul', '+1ps', 'okulum'),
        ('pencere', '+2ps', 'penceren'),
        ('kol', '+1pm', 'kolumuz'),
        ('masa', '+2pm', 'masanız'),
        ('kapı', '+3ps', 'kapısı')
    ]
    i = 0
    # Run the test cases
    for word, specifier, expected in test_cases:
        i=i+1
        result = possessive_word(word, p_fst, specifier)
        print(f'{i}: {word}, Specifier: {specifier} -->  Expected: {expected}, Result: {result}')

test_possessive_suffix()

1: kulaklık, Specifier: +3ps -->  Expected: kulaklığı, Result: kulaklıkı
2: kömür, Specifier: +2pm -->  Expected: kömürünüz, Result: kömürünüz
3: ev, Specifier: +2ps -->  Expected: evin, Result: evin
4: araba, Specifier: +1pm -->  Expected: arabamız, Result: arabamız
5: kalem, Specifier: +2pm -->  Expected: kaleminiz, Result: kaleminiz
6: çocuk, Specifier: +3pm -->  Expected: çocukları, Result: çocukları
7: okul, Specifier: +1ps -->  Expected: okulum, Result: okulum
8: pencere, Specifier: +2ps -->  Expected: penceren, Result: penceren
9: kol, Specifier: +1pm -->  Expected: kolumuz, Result: kolumuz
10: masa, Specifier: +2pm -->  Expected: masanız, Result: masanız
11: kapı, Specifier: +3ps -->  Expected: kapısı, Result: kapısı


In [41]:
def test_plural_suffix():
    p_fst = create_plural_fst()

    # Define the test cases
    test_cases = [
        ('kulaklık', 'kulaklıklar'),
        ('kitap', 'kitaplar'),
        ('ev', 'evler'),
        ('araba', 'arabalar'),
        ('kalem','kalemler'),
        ('çocuk', 'çocuklar'),
        ('okul', 'okullar'),
        ('pencere', 'pencereler'),
        ('sandalye', 'sandalyeler'),
        ('masa', 'masalar'),
        ('kapı', 'kapılar')
    ]
    i = 0
    # Run the test cases
    for word, expected in test_cases:
        i=i+1
        result = pluralize_word(word, p_fst)
        print(f'{i}: {word} -->  Expected: {expected}, Result: {result}')

test_plural_suffix()

1: kulaklık -->  Expected: kulaklıklar, Result: kulaklıklar
2: kitap -->  Expected: kitaplar, Result: kitaplar
3: ev -->  Expected: evler, Result: evler
4: araba -->  Expected: arabalar, Result: arabalar
5: kalem -->  Expected: kalemler, Result: kalemler
6: çocuk -->  Expected: çocuklar, Result: çocuklar
7: okul -->  Expected: okullar, Result: okullar
8: pencere -->  Expected: pencereler, Result: pencereler
9: sandalye -->  Expected: sandalyeler, Result: sandalyeler
10: masa -->  Expected: masalar, Result: masalar
11: kapı -->  Expected: kapılar, Result: kapılar
