# Syntax

## Las score

Las score is a number between 0 and 1 that shows the proportion of equal arcs starting in the window.

In [1]:
from estnltk.syntax.scoring import las_score
from estnltk.converters.conll_importer import conll_to_text, add_layer_from_conll


text = conll_to_text(file='a.conll', syntax_layer='conll_a')
add_layer_from_conll(file='a.conll', text=text, syntax_layer='conll_b')

text
Milliseks kujuneb Riigikassa ja Ühispanga vahekord ? Minu arvates on Eesti pangandus tehnoloogiliselt maailma tasemel .

layer name,attributes,parent,enveloping,ambiguous,span count
words,,,,False,16
conll_a,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children, parent_deprel",,,False,16
conll_b,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children, parent_deprel",,,False,16


Change the second layer a little bit.

In [2]:
text.conll_b[3].deprel = '@X'

text.conll_a[3].deprel != text.conll_b[3].deprel

True

Calculate las score.

In [3]:
las_score(layer_a=text.conll_a,
          layer_b=text.conll_b,
          start=None,
          end=None
          )

0.9375

Compare first 4 spans.

In [4]:
las_score(text.conll_a, text.conll_b, 0, 4)

0.75

Compare spans starting from the fifth.

In [5]:
las_score(text.conll_a, text.conll_b, 4)

1.0

## Tag sliding las scores

In [6]:
from estnltk.taggers.syntax.syntax_las_tagger import SyntaxLasTagger

tagger = SyntaxLasTagger('conll_a', 'conll_b', window=3)
tagger

name,output layer,output attributes,input layers
SyntaxLasTagger,las,"('deprel_sequence', 'score')","('conll_a', 'conll_b')"

0,1
window,3


In [7]:
tagger.tag(text)
text.las

layer name,attributes,parent,enveloping,ambiguous,span count
las,"deprel_sequence, score",,conll_a,False,20

text,deprel_sequence,score
['Milliseks'],"('@ADVL',)",1.0
"['Milliseks', 'kujuneb']","('@ADVL', 'ROOT')",1.0
"['Milliseks', 'kujuneb', 'Riigikassa']","('@ADVL', 'ROOT', '@SUBJ')",1.0
"['kujuneb', 'Riigikassa', 'ja']","('ROOT', '@SUBJ', '@J')",0.6666666666666666
"['Riigikassa', 'ja', 'Ühispanga']","('@SUBJ', '@J', '@NN>')",0.6666666666666666
"['ja', 'Ühispanga', 'vahekord']","('@J', '@NN>', '@SUBJ')",0.6666666666666666
"['Ühispanga', 'vahekord', '?']","('@NN>', '@SUBJ', '@Punc')",1.0
"['vahekord', '?']","('@SUBJ', '@Punc')",1.0
['?'],"('@Punc',)",1.0
['Minu'],"('@ADVL',)",1.0


In [8]:
text.las.meta['aggregate_deprel_sequences']

{('@ADVL',): [1.0, 1.0],
 ('@ADVL', '@ADVL'): [1.0],
 ('@ADVL', '@ADVL', 'ROOT'): [1.0],
 ('@ADVL', '@NN>', '@ADVL'): [1.0],
 ('@ADVL', '@Punc'): [1.0],
 ('@ADVL', 'ROOT'): [1.0],
 ('@ADVL', 'ROOT', '@NN>'): [1.0],
 ('@ADVL', 'ROOT', '@SUBJ'): [1.0],
 ('@J', '@NN>', '@SUBJ'): [0.6666666666666666],
 ('@NN>', '@ADVL', '@Punc'): [1.0],
 ('@NN>', '@SUBJ', '@ADVL'): [1.0],
 ('@NN>', '@SUBJ', '@Punc'): [1.0],
 ('@Punc',): [1.0, 1.0],
 ('@SUBJ', '@ADVL', '@NN>'): [1.0],
 ('@SUBJ', '@J', '@NN>'): [0.6666666666666666],
 ('@SUBJ', '@Punc'): [1.0],
 ('ROOT', '@NN>', '@SUBJ'): [1.0],
 ('ROOT', '@SUBJ', '@J'): [0.6666666666666666]}

## `VislTagger`

In [9]:
from estnltk.taggers import VislTagger

text.tag_layer(['morph_extended'])

visl_tagger = VislTagger()
visl_tagger.tag(text)

text.visl

layer name,attributes,parent,enveloping,ambiguous,span count
visl,"id, form, lemma, upostag, xpostag, feats, head, deprel, deps, misc",words,,True,16

text,id,form,lemma,upostag,xpostag,feats,head,deprel,deps,misc
Milliseks,1,Milliseks,milline,P,P,rel|sg|tr|cap,2,@ADVL,_,_
kujuneb,2,kujuneb,kujune,V,V,main|indic|pres|ps3|sg|ps|af,0,@FMV,_,_
Riigikassa,3,Riigikassa,riigi_kassa,S,S,com|sg|gen|cap,5,@NN>,_,_
ja,4,ja,ja,J,J,crd,5,@J,_,_
Ühispanga,5,Ühispanga,Ühis_pank,S,S,prop|sg|gen|cap,6,@NN>,_,_
vahekord,6,vahekord,vahe_kord,S,S,com|sg|nom,2,@SUBJ,_,_
?,7,?,?,Z,Z,Int|CLB,6,xxx,_,_
Minu,1,Minu,mina,P,P,pers|ps1|sg|gen|cap,2,@OBJ,_,_
arvates,2,arvates,arva,V,V,main|ger,3,@ADVL,_,_
on,3,on,ole,V,V,main|indic|pres|ps3|sg|ps|af,0,@FMV,_,_


## `ConllMorphTagger`

In [10]:
from estnltk.taggers.syntax.conll_morph_tagger import ConllMorphTagger

tagger = ConllMorphTagger(output_layer='conll_morph',  # default: 'conll_morph'
                          morph_extended_layer='morph_extended'  # default: 'morph_extended'
                          )
tagger

name,output layer,output attributes,input layers
ConllMorphTagger,conll_morph,"('id', 'form', 'lemma', 'upostag', 'xpostag', 'feats', 'head', 'deprel', 'deps', 'misc')","('morph_extended',)"


In [11]:
tagger.tag(text)

text.conll_morph

layer name,attributes,parent,enveloping,ambiguous,span count
conll_morph,"id, form, lemma, upostag, xpostag, feats, head, deprel, deps, misc",words,,True,16

text,id,form,lemma,upostag,xpostag,feats,head,deprel,deps,misc
Milliseks,1,Milliseks,milline,P,P,rel|sg|tr,_,_,_,_
kujuneb,2,kujuneb,kujune,V,V,indic|pres|ps3|sg,_,_,_,_
Riigikassa,3,Riigikassa,riigi_kassa,S,S,sg|gen,_,_,_,_
ja,4,ja,ja,J,Jc,_,_,_,_,_
Ühispanga,5,Ühispanga,Ühis_pank,S,H,sg|gen,_,_,_,_
vahekord,6,vahekord,vahe_kord,S,S,sg|nom,_,_,_,_
?,7,?,?,Z,Z,Int,_,_,_,_
Minu,1,Minu,mina,P,Ppers,ps1|sg|gen,_,_,_,_
arvates,2,arvates,arva,V,V,ger,_,_,_,_
on,3,on,ole,V,V,indic|pres|ps3|sg,_,_,_,_


Convert conll_morph layer to CoNLL-format string.

In [12]:
from estnltk.taggers.syntax.conll_morph_to_str import *

print(conll_to_str(text))

1	Milliseks	milline	P	P	rel|sg|tr	_	_	_	_	
2	kujuneb	kujune	V	V	indic|pres|ps3|sg	_	_	_	_	
3	Riigikassa	riigi_kassa	S	S	sg|gen	_	_	_	_	
4	ja	ja	J	Jc	_	_	_	_	_	
5	Ühispanga	Ühis_pank	S	H	sg|gen	_	_	_	_	
6	vahekord	vahe_kord	S	S	sg|nom	_	_	_	_	
7	?	?	Z	Z	Int	_	_	_	_	

1	Minu	mina	P	Ppers	ps1|sg|gen	_	_	_	_	
2	arvates	arva	V	V	ger	_	_	_	_	
3	on	ole	V	V	indic|pres|ps3|sg	_	_	_	_	
4	Eesti	Eesti	S	H	sg|gen	_	_	_	_	
5	pangandus	pangandus	S	S	sg|nom	_	_	_	_	
6	tehnoloogiliselt	tehnoloogilise=lt	D	D	_	_	_	_	_	
7	maailma	maa_ilm	S	S	sg|gen	_	_	_	_	
8	tasemel	tase	S	S	sg|ad	_	_	_	_	
9	.	.	Z	Z	Fst	_	_	_	_	




# Maltparser

Parse CoNLL-format string with maltparser.

In [13]:
from estnltk.taggers.syntax.maltparser import MaltParser

parser = MaltParser()
initial_output = parser.parse_text(text, return_type='conll')
print( '\n'.join( initial_output) )

1	Milliseks	milline	P	P	rel|sg|tr	2	@ADVL	_	_
2	kujuneb	kujune	V	V	indic|pres|ps3|sg	0	ROOT	_	_
3	Riigikassa	riigi_kassa	S	S	sg|gen	0	ROOT	_	_
4	ja	ja	J	Jc	_	6	@J	_	_
5	Ühispanga	Ühis_pank	S	H	sg|gen	6	@NN>	_	_
6	vahekord	vahe_kord	S	S	sg|nom	3	@??	_	_
7	?	?	Z	Z	Int	6	@Punc	_	_

1	Minu	mina	P	Ppers	ps1|sg|gen	2	@ADVL	_	_
2	arvates	arva	V	V	ger	3	@ADVL	_	_
3	on	ole	V	V	indic|pres|ps3|sg	0	ROOT	_	_
4	Eesti	Eesti	S	H	sg|gen	5	@NN>	_	_
5	pangandus	pangandus	S	S	sg|nom	3	@SUBJ	_	_
6	tehnoloogiliselt	tehnoloogilise=lt	D	D	_	3	@ADVL	_	_
7	maailma	maa_ilm	S	S	sg|gen	8	@NN>	_	_
8	tasemel	tase	S	S	sg|ad	3	@ADVL	_	_
9	.	.	Z	Z	Fst	8	@Punc	_	_

