# **Install and import necessary libraries**




In [None]:
!pip install hfst
import hfst
from hfst import compile_lexc_file

In [None]:
generator = hfst.compile_lexc_file('russian.lexc')

# **Synthesis**
Here we try to iterate through all of the stems all synthesize all of the forms. Notice, that for some words they won't be correct, since we haven't implemented any rules for handling the exceptions yet (for words like *человек*, *озеро*, *мать*).

In [None]:
from hfst import HfstTransducer, regex

analyzer = HfstTransducer(generator) # Create a copy

cases = ('Nom', 'Gen', 'Dat', 'Acc', 'Ins', 'Pre')
numbers = ('Sg', 'Pl')
stems = (
    "стен", # N1dekl_a;
    "зарплат", # N1dekl_a;
    "бур", # N1dekl_ya;

    "стол", # N2dekl_cons_ply; ! ply - Plural Nominative has -ы
    "рот",  # N2dekl_cons_ply;
    "крюк", # N2dekl_cons_pli; ! pli - Plural Nominative has -и
    "адвокат", # N2dekl_cons_anim_ply; ! animate noun (different ending in accusative)
    "человек", # N2dekl_cons_anim_pli; ! irregular plural forms using a different stem (люди, людей, людям...)
    "озер", # N2dekl_o; ! -е -> -ё change in plural (озёра, озёр...)
    "пол", # N2dekl_e;

    "кроват", # N3dekl;
    "тетрад", # N3dekl;
    "мат", # N3dekl_anim; ! irregular forms, suffix -ер is inserted (матери, матерью, матерей...)

    "шоссе", # N0dekl; ! indeclinable word
)

for stem in stems:
  for number in numbers:
    for _case in cases:
      print(analyzer.lookup(f"{stem}+N+{number}+{_case}"))
  print()

# Handling letter shifts
The word "озеро" has a letter shift in its stem in plural number. We need to write a rule that deals with it.
Here, the regex is used to replace the stem when it is followed by "^S", with S being the shift marker.

In [None]:
# Replacement
Replacement = hfst.regex('о з е р -> о з ё р || _ "^" S')

print(Replacement.lookup('озер^Sа'))
print(Replacement.lookup('озер^S'))
print(Replacement.lookup('озер^Sам'))
print(Replacement.lookup('озер^Sами'))
print(Replacement.lookup('озер^Sах'))

# Handling suffix insertion
The word "мать" has an additional suffix -ер in all forms except Singular Nominative and Singular Accusative. We need to write a rule that deals with it. Here, the regex is used to insert the suffix, when there is an insertion marker I.

In [None]:
# Insertion
Insertion = hfst.regex('м а т -> м а т е р || _ "^" I')
print(Insertion.lookup("мат^Iи"))
print(Insertion.lookup("мат^Iью"))

# Handling letter deletions


1.   List item
2.   List item


The word "рот" loses the letter "о" in all forms, except singular nominative and accusative. We need to write a rule that deals with it.

In [None]:
# Deletion
Deletion = hfst.regex('р о т -> р т || _ "^" D')
print(Deletion.lookup("рот^Dа"))
print(Deletion.lookup("рот^Dы"))

# Handling irregular declension forms
The word "человек" uses a completely different stem in Plural - "люд" (люди, людей, людям...). We need to write a rule that deals with it. Here, the regex is used to replace one stem with another. Since there is no overlap between the Singular number and Plural number endings, we don't have to use any marker.

In [None]:
# Exceptions
chelovek_exceptions = [
    hfst.regex('ч е л о в е к и -> л ю д и'),
    hfst.regex('ч е л о в е к о в -> л ю д е й'),
    hfst.regex('ч е л о в е к а м и -> л ю д ь м и'),
    hfst.regex('ч е л о в е к а м -> л ю д я м'),
    hfst.regex('ч е л о в е к а х -> л ю д я х')
]

chelovek_exception = hfst.compose(chelovek_exceptions)
print(chelovek_exception.lookup("человеки"))
print(chelovek_exception.lookup("человеков"))
print(chelovek_exception.lookup("человекам"))
print(chelovek_exception.lookup("человеками"))
print(chelovek_exception.lookup("человеках"))

# Deleting special symbols

In [None]:
# Deleting
Cleanup = hfst.regex('[S | I | E | D | "^"] -> 0')
print(Cleanup.lookup("матер^Iи"))

# FST composition

In [None]:
from hfst import compose

# Sintēze | Generation
cascade = compose((generator, Replacement, Insertion, Deletion, Cleanup, chelovek_exception))

print(cascade.lookup("человек+N+Pl+Pre"))
print(cascade.lookup("озер+N+Pl+Dat"))
print(cascade.lookup("мат+N+Sg+Gen"))

In [None]:
# If you use the HFST library, you will need to remove the epsilon symbol.
cascade.remove_epsilons()
print(cascade.lookup("человек+N+Pl+Pre")[0][0].replace("@_EPSILON_SYMBOL_@", ""))

# The final list of all forms

In [None]:
for stem in stems:
  for number in numbers:
    for _case in cases:
      print(stem + "+N+" + number + "+" + _case + " - " + cascade.lookup(stem + "+N+" + number + "+" + _case)[0][0].replace("@_EPSILON_SYMBOL_@", ""))
  print()