In [21]:
import sys
import itertools
from anytree import Node, RenderTree,Resolver,ChildResolverError,NodeMixin,PreOrderIter
from anytree.dotexport import RenderTreeGraph

class CustomNode(NodeMixin):  # Add state number feature
    def __init__(self, name, state_number,symbol,end_state, parent=None):
        super(CustomNode, self).__init__()
        self.name=name
        self.state_number=state_number
        self.parent=parent
        self.end_state=end_state
        self.symbol=symbol

state_number=0
top=CustomNode("root",state_number,'',False)
r = Resolver('name')




In [22]:
def add_inflectional_suffixes(suffix,state_number,root_node):
    suffixes=suffix.split(":")
    state_number+=1
    node=CustomNode('<eps>',state_number,'-',False,root_node)
    state_number+=1
    node=CustomNode('^',state_number,'<eps>',False,node)
    for ch in suffixes[0][1:]:
        state_number+=1
        node=CustomNode(ch,state_number,ch,False,node)
    state_number+=1
    CustomNode('<eps>',state_number,suffixes[1],True,node)
    if suffixes[1]=='+N':
        state_number+=1
        plural_node=CustomNode('^s',state_number,'+PL',True,node)
        state_number+=1
        CustomNode("'s",state_number,'+SGPS',True,node)
        state_number+=1
        CustomNode("'",state_number,'+PLPS',True,plural_node)
    elif suffixes[1]=='+V':
        state_number+=1
        CustomNode("^s",state_number,'+3D',True,node)
        state_number+=1
        CustomNode("^ing",state_number,'+ING',True,node)
    return (node,state_number)
def append_suffixes(root_node,morphs,index,state_number):
    suffix=''
    while len(morphs)>index:
        index+=1
        if morphs[index]=='|':
            if suffix!='': #check if suffix to be handle before pipe
                
                node,state_number=add_inflectional_suffixes(suffix,state_number,root_node)
                suffix=''
        elif morphs[index]=='(':
            node,state_number=add_inflectional_suffixes(suffix,state_number,root_node)
            index,state_number=append_suffixes(node,morphs,index,state_number)
            suffix=''
        elif morphs[index]==')':
            if suffix!='': #check if suffix to be handle before paranthesis
                val=add_inflectional_suffixes(suffix,state_number,root_node)
                state_number=val[1]
            return (index,state_number)
        else:
            suffix+=morphs[index]
    return (index,state_number)

def insert_irregular_verb(stem,state_number,root):
    top=root
    verbs=stems=stem.split('/')
    for i in range(len(verbs[0])):
        state_number+=1
        root=CustomNode(verbs[0][i],state_number,verbs[0][i],False,root)
    end_ptr=root 
    state_number+=1
    end_ptr=CustomNode('<eps>',state_number,'+V',True,end_ptr)
    
    # add past tense
    end_ptr=top  
    for i in range(len(verbs[0])):
        state_number+=1
        end_ptr=CustomNode('<eps>',state_number,verbs[0][i],False,end_ptr)
    for i in range(len(verbs[1])):
        state_number+=1
        end_ptr=CustomNode(verbs[1][i],state_number,'<eps>',False,end_ptr)
    state_number+=1
    end_ptr=CustomNode('<eps>',state_number,'+V',False,end_ptr)
    state_number+=1
    end_ptr=CustomNode('<eps>',state_number,'+PT',True,end_ptr)    
    
    #add past perfect
    end_ptr=top
    for i in range(len(verbs[0])):
        state_number+=1
        end_ptr=CustomNode('<eps>',state_number,verbs[0][i],False,end_ptr)
    for i in range(len(verbs[2])):
        state_number+=1
        end_ptr=CustomNode(verbs[2][i],state_number,'<eps>',False,end_ptr)
    state_number+=1
    end_ptr=CustomNode('<eps>',state_number,'+V',False,end_ptr)
    state_number+=1
    end_ptr=CustomNode('<eps>',state_number,'+PP',True,end_ptr)    
            
    return (root,state_number)

In [23]:
#words = ['compute (er:+N(ize:+V(ation:+N|able:+ADJ)|less:+ADJ(ness:+N))|ation:+N(able:+ADJ))','work (er:+N(ism:+N))']
#words = ["be/were&was/been (ing:+N)","begin/began/begun (ing:+N|er:+N)"]
with open('Lexicon/Verbs.txt') as f:
    words = f.readlines()
    words.sort()
    for word in words:
        line=word.rstrip()
        arr=line.split(' ')
        stem=arr[0]
        if len(arr)==2:
            morphs=arr[1]
        root=top

        if '/' in stem: #means irregular verb
            root,state_number=insert_irregular_verb(stem,state_number,top)
        else:
            for i in range(len(stem)):
                try:
                    n=r.get(root,stem[i])
                except ChildResolverError:
                    state_number+=1
                    n=CustomNode(stem[i],state_number,stem[i],False,root)
                root=n
            state_number+=1
            CustomNode("<eps>",state_number,'+V',True,root)
            state_number+=1
            CustomNode("^s",state_number,'+3D',True,root)
            state_number+=1
            CustomNode("^ing",state_number,'+ING',True,root)
        if len(arr)==2:
            return_val=append_suffixes(root,morphs,0,state_number)
            state_number=return_val[1]
RenderTreeGraph(top).to_picture("udo.png")

In [24]:
p=PreOrderIter(top)

p=itertools.islice(p,1,None) #pass root


for n in p:
    print(n.name + " " +str(n.state_number) + "parent: " + n.parent.name)

a 1parent: root
c 2parent: a
c 3parent: c
e 4parent: c
p 5parent: e
t 6parent: p
<eps> 7parent: t
^s 8parent: t
^ing 9parent: t
<eps> 10parent: t
^ 11parent: <eps>
e 12parent: ^
e 13parent: e
<eps> 14parent: e
^s 15parent: e
' 17parent: ^s
's 16parent: e
<eps> 18parent: t
^ 19parent: <eps>
i 20parent: ^
n 21parent: i
g 22parent: n
<eps> 23parent: g
<eps> 24parent: g
^ 25parent: <eps>
n 26parent: ^
e 27parent: n
s 28parent: e
s 29parent: s
<eps> 30parent: s
^s 31parent: s
' 33parent: ^s
's 32parent: s
<eps> 34parent: g
^ 35parent: <eps>
l 36parent: ^
y 37parent: l
<eps> 38parent: y
<eps> 39parent: t
^ 40parent: <eps>
a 41parent: ^
t 42parent: a
i 43parent: t
o 44parent: i
n 45parent: o
<eps> 46parent: n
^s 47parent: n
' 49parent: ^s
's 48parent: n
<eps> 50parent: t
^ 51parent: <eps>
o 52parent: ^
r 53parent: o
<eps> 54parent: r
^s 55parent: r
' 57parent: ^s
's 56parent: r
<eps> 58parent: t
^ 59parent: <eps>
e 60parent: ^
n 61parent: e
c 62parent: n
e 63parent: c
<eps> 64parent: e
^s 65p

In [25]:

#for pre, _, node in RenderTree(top):
#    treestr = u"%s%s" % (pre, node.name)
#    print(treestr.ljust(8), node.state_number)
def print_as_tabs(prev_state,next_state,input_label,output_label):
    print(str(prev_state) + "\t" + str(next_state) + "\t"+ str(input_label) + "\t" + str(output_label))


# In[222]:


p=PreOrderIter(top)
p=itertools.islice(p,1,None) #pass root

print_as_tabs(0,1,"<s>","<s>")
print_as_tabs(1,0,"<eps>","<eps>")
print_as_tabs(0,2,"</s>","</s>")
print_as_tabs(2,0,"<eps>","<eps>")
print_as_tabs(0,3,"<unk>","<unk>")
print_as_tabs(3,0,"<eps>","<eps>")
for n in p:
    parent_state_number=n.parent.state_number
    if n.parent.state_number!=0:
        parent_state_number+=3
    print_as_tabs(parent_state_number,n.state_number+3,n.symbol,n.name)
    if n.end_state:
        print_as_tabs(n.state_number+3,0,"<eps>","#")  
    #if n.is_leaf:
     #   print_as_tabs(n.state_number+3,0,"<eps>","#")
print("0")


0	1	<s>	<s>
1	0	<eps>	<eps>
0	2	</s>	</s>
2	0	<eps>	<eps>
0	3	<unk>	<unk>
3	0	<eps>	<eps>
0	4	a	a
4	5	c	c
5	6	c	c
6	7	e	e
7	8	p	p
8	9	t	t
9	10	+V	<eps>
10	0	<eps>	#
9	11	+3D	^s
11	0	<eps>	#
9	12	+ING	^ing
12	0	<eps>	#
9	13	-	<eps>
13	14	<eps>	^
14	15	e	e
15	16	e	e
16	17	+N	<eps>
17	0	<eps>	#
16	18	+PL	^s
18	0	<eps>	#
18	20	+PLPS	'
20	0	<eps>	#
16	19	+SGPS	's
19	0	<eps>	#
9	21	-	<eps>
21	22	<eps>	^
22	23	i	i
23	24	n	n
24	25	g	g
25	26	+ADJ	<eps>
26	0	<eps>	#
25	27	-	<eps>
27	28	<eps>	^
28	29	n	n
29	30	e	e
30	31	s	s
31	32	s	s
32	33	+N	<eps>
33	0	<eps>	#
32	34	+PL	^s
34	0	<eps>	#
34	36	+PLPS	'
36	0	<eps>	#
32	35	+SGPS	's
35	0	<eps>	#
25	37	-	<eps>
37	38	<eps>	^
38	39	l	l
39	40	y	y
40	41	+ADV	<eps>
41	0	<eps>	#
9	42	-	<eps>
42	43	<eps>	^
43	44	a	a
44	45	t	t
45	46	i	i
46	47	o	o
47	48	n	n
48	49	+N	<eps>
49	0	<eps>	#
48	50	+PL	^s
50	0	<eps>	#
50	52	+PLPS	'
52	0	<eps>	#
48	51	+SGPS	's
51	0	<eps>	#
9	53	-	<eps>
53	54	<eps>	^
54	55	o	o
55	56	r	r
56	57	+N	<eps>
57	0	<eps>	#
56	58	+PL	^s
58	0	<eps>

In [30]:
def append_suffixes(node_stack,morphs,index,paranthesis_stack,state_number):
    suffix=''
    while len(paranthesis_stack)!=0:
        index+=1
        
        if morphs[index]=='|':
            if suffix!='':
                node=node_stack[-1] 
                state_number+=1
                suffixes=suffix.split(':')
                CustomNode(suffixes[0],state_number,suffixes[1],node)
                suffix=''
        elif morphs[index]=='(':
            suffixes=suffix.split(':')
            state_number+=1
            
            node=CustomNode(suffixes[0],state_number,suffixes[1],node_stack[-1])
            node_stack.append(node)
            paranthesis_stack.append(index)
            param_node_stack=[node_stack[-1]]
            param_paranthesis_stack=[index]
            print(node_stack[-1].name)
            print(param_paranthesis_stack)
        
            var=append_suffixes(param_node_stack,morphs,index,param_paranthesis_stack,state_number)
            index=var[0]
            state_number=var[1]
            node_stack.pop()
            paranthesis_stack.pop()
            suffix=''
            print(node_stack[-1].name)
        elif morphs[index]==')':
            if suffix!='':
                node=node_stack[-1] 
                state_number+=1
                suffixes=suffix.split(':')
                CustomNode(suffixes[0],state_number,suffixes[1],node)
            paranthesis_stack.pop()
            node_stack.pop()
            return (index,state_number,node_stack,paranthesis_stack)
        else:
            suffix+=morphs[index]
    return (index,state_number,node_stack,paranthesis_stack)