In [12]:
import sys
import itertools
from anytree import Node, RenderTree,Resolver,ChildResolverError,NodeMixin,PreOrderIter
from anytree.dotexport import RenderTreeGraph

class CustomNode(NodeMixin):  # Add state number feature
    def __init__(self, name, state_number,symbol,end_state, parent=None):
        super(CustomNode, self).__init__()
        self.name=name
        self.state_number=state_number
        self.parent=parent
        self.end_state=end_state
        self.symbol=symbol

state_number=0
top=CustomNode("root",state_number,'',False)
r = Resolver('name')




In [15]:
def add_inflectional_suffixes(suffix,state_number,root_node):
    suffixes=suffix.split(":")
    state_number+=1
    
    node=CustomNode(suffixes[0],state_number,suffixes[1],True,root_node)
    if suffixes[1]=='+N':
        state_number+=1
        plural_node=CustomNode('^s',state_number,'+PL',True,node)
        state_number+=1
        CustomNode("^s",state_number,'+SGPS',True,node)
        state_number+=1
        CustomNode("'",state_number,'+PLPS',True,plural_node)
    elif suffixes[1]=='+V':
        state_number+=1
        CustomNode("^s",state_number,'+3D',True,node)
        state_number+=1
        CustomNode("ing",state_number,'+ING',True,node)
    return (node,state_number)
def append_suffixes(root_node,morphs,index,state_number):
    suffix=''
    while len(morphs)>index:
        index+=1
        if morphs[index]=='|':
            if suffix!='': #check if suffix to be handle before pipe
                
                node,state_number=add_inflectional_suffixes(suffix,state_number,root_node)
                suffix=''
        elif morphs[index]=='(':
            node,state_number=add_inflectional_suffixes(suffix,state_number,root_node)
            index,state_number=append_suffixes(node,morphs,index,state_number)
            suffix=''
        elif morphs[index]==')':
            if suffix!='': #check if suffix to be handle before paranthesis
                val=add_inflectional_suffixes(suffix,state_number,root_node)
                state_number=val[1]
            return (index,state_number)
        else:
            suffix+=morphs[index]
    return (index,state_number)

def insert_irregular_verb(stem,state_number,root):
    verbs=stems=stem.split('/')
    root_stem=verbs[0]
    for i in range(len(root_stem)):
        state_number+=1
        root=CustomNode(stem[i],state_number,'<eps>',False,root)
        
    state_number+=1
    n=CustomNode(verbs[0],state_number,verbs[0]+'+V',True,root)
    
    if '&' in verbs[1]: #'were&was'
        past_verbs=verbs[1].split('&')
        state_number+=1
        n=CustomNode(past_verbs[0],state_number,past_verbs[0]+'+V+PT',True,root)
        state_number+=1
        n=CustomNode(past_verbs[1],state_number,past_verbs[1]+'+V+3D+PT',True,root)
    else:
        state_number+=1
        n=CustomNode(verbs[1],state_number,verbs[1]+'+V+PT',True,root)
    state_number+=1
    n=CustomNode(verbs[2],state_number,verbs[2]+'+V+PP',True,root)
            
    return (root,state_number)

In [16]:
#words = ['compute (er:+N(ize:+V(ation:+N|able:+ADJ)|less:+ADJ(ness:+N))|ation:+N(able:+ADJ))','work (er:+N(ism:+N))']
words = ["be/were&was/been (ing:+N)","begin/began/begun (ing:+N|er:+N)"]
words.sort()
for line in words:
    arr=line.split(' ')
    stem=arr[0]
    morphs=arr[1]
    root=top

    if '/' in stem: #means irregular verb
        root,state_number=insert_irregular_verb(stem,state_number,top)
    else:
        for i in range(len(stem)):
            try:
                n=r.get(root,stem[i])
            except ChildResolverError:
                state_number+=1
                n=CustomNode(stem[i],state_number,stem[i],False,root)
            root=n
        root.name+='+V' #For verb fst generation add hardcodedly +V tag to input 
        root.end_state=True
    return_val=append_suffixes(root,morphs,0,state_number)
    state_number=return_val[1]
RenderTreeGraph(top).to_picture("udo.png")

In [17]:
p=PreOrderIter(top)

p=itertools.islice(p,1,None) #pass root


for n in p:
    print(n.name + " " +str(n.state_number) + "parent: " + n.parent.name)

b 1parent: root
e 2parent: b
be 3parent: e
were 4parent: e
was 5parent: e
been 6parent: e
ing 7parent: e
^s 8parent: ing
' 10parent: ^s
^s 9parent: ing
b 11parent: root
e 12parent: b
g 13parent: e
i 14parent: g
n 15parent: i
begin 16parent: n
began 17parent: n
begun 18parent: n
ing 19parent: n
^s 20parent: ing
' 22parent: ^s
^s 21parent: ing
er 23parent: ing
^s 24parent: er
' 26parent: ^s
^s 25parent: er
b 19parent: root
e 20parent: b
be 21parent: e
were 22parent: e
was 23parent: e
been 24parent: e
ing 25parent: e
^s 26parent: ing
' 28parent: ^s
^s 27parent: ing
b 29parent: root
e 30parent: b
g 31parent: e
i 32parent: g
n 33parent: i
begin 34parent: n
began 35parent: n
begun 36parent: n
ing 37parent: n
^s 38parent: ing
' 40parent: ^s
^s 39parent: ing
er 41parent: n
^s 42parent: er
' 44parent: ^s
^s 43parent: er


In [18]:

#for pre, _, node in RenderTree(top):
#    treestr = u"%s%s" % (pre, node.name)
#    print(treestr.ljust(8), node.state_number)
def print_as_tabs(prev_state,next_state,input_label,output_label):
    print(str(prev_state) + "\t" + str(next_state) + "\t"+ str(input_label) + "\t" + str(output_label))


# In[222]:


p=PreOrderIter(top)
p=itertools.islice(p,1,None) #pass root

print_as_tabs(0,1,"<s>","<s>")
print_as_tabs(1,0,"<eps>","<eps>")
print_as_tabs(0,2,"</s>","</s>")
print_as_tabs(2,0,"<eps>","<eps>")
print_as_tabs(0,3,"<unk>","<unk>")
print_as_tabs(3,0,"<eps>","<eps>")
for n in p:
    parent_state_number=n.parent.state_number
    if n.parent.state_number!=0:
        parent_state_number+=3
    print_as_tabs(parent_state_number,n.state_number+3,n.symbol,n.name)
    if n.end_state:
        print_as_tabs(n.state_number+3,0,"<eps>","#")  
    #if n.is_leaf:
     #   print_as_tabs(n.state_number+3,0,"<eps>","#")
print("0")


0	1	<s>	<s>
1	0	<eps>	<eps>
0	2	</s>	</s>
2	0	<eps>	<eps>
0	3	<unk>	<unk>
3	0	<eps>	<eps>
0	4	<eps>	b
4	5	<eps>	e
5	6	be+V	be
6	0	<eps>	#
5	7	were+V+PT	were
7	0	<eps>	#
5	8	was+V+3D+PT	was
8	0	<eps>	#
5	9	been+V+PP	been
9	0	<eps>	#
5	10	+N	ing
10	0	<eps>	#
10	11	+PL	^s
11	0	<eps>	#
11	13	+PLPS	'
13	0	<eps>	#
10	12	+SGPS	^s
12	0	<eps>	#
0	14	<eps>	b
14	15	<eps>	e
15	16	<eps>	g
16	17	<eps>	i
17	18	<eps>	n
18	19	begin+V	begin
19	0	<eps>	#
18	20	began+V+PT	began
20	0	<eps>	#
18	21	begun+V+PP	begun
21	0	<eps>	#
18	22	+N	ing
22	0	<eps>	#
22	23	+PL	^s
23	0	<eps>	#
23	25	+PLPS	'
25	0	<eps>	#
22	24	+SGPS	^s
24	0	<eps>	#
22	26	+N	er
26	0	<eps>	#
26	27	+PL	^s
27	0	<eps>	#
27	29	+PLPS	'
29	0	<eps>	#
26	28	+SGPS	^s
28	0	<eps>	#
0	22	<eps>	b
22	23	<eps>	e
23	24	be+V	be
24	0	<eps>	#
23	25	were+V+PT	were
25	0	<eps>	#
23	26	was+V+3D+PT	was
26	0	<eps>	#
23	27	been+V+PP	been
27	0	<eps>	#
23	28	+N	ing
28	0	<eps>	#
28	29	+PL	^s
29	0	<eps>	#
29	31	+PLPS	'
31	0	<eps>	#
28	30	+SGPS	^s
30	0	<eps>	#
0	32	<eps>	

In [30]:
def append_suffixes(node_stack,morphs,index,paranthesis_stack,state_number):
    suffix=''
    while len(paranthesis_stack)!=0:
        index+=1
        
        if morphs[index]=='|':
            if suffix!='':
                node=node_stack[-1] 
                state_number+=1
                suffixes=suffix.split(':')
                CustomNode(suffixes[0],state_number,suffixes[1],node)
                suffix=''
        elif morphs[index]=='(':
            suffixes=suffix.split(':')
            state_number+=1
            
            node=CustomNode(suffixes[0],state_number,suffixes[1],node_stack[-1])
            node_stack.append(node)
            paranthesis_stack.append(index)
            param_node_stack=[node_stack[-1]]
            param_paranthesis_stack=[index]
            print(node_stack[-1].name)
            print(param_paranthesis_stack)
        
            var=append_suffixes(param_node_stack,morphs,index,param_paranthesis_stack,state_number)
            index=var[0]
            state_number=var[1]
            node_stack.pop()
            paranthesis_stack.pop()
            suffix=''
            print(node_stack[-1].name)
        elif morphs[index]==')':
            if suffix!='':
                node=node_stack[-1] 
                state_number+=1
                suffixes=suffix.split(':')
                CustomNode(suffixes[0],state_number,suffixes[1],node)
            paranthesis_stack.pop()
            node_stack.pop()
            return (index,state_number,node_stack,paranthesis_stack)
        else:
            suffix+=morphs[index]
    return (index,state_number,node_stack,paranthesis_stack)