# Syntax

## Las score

Las score is a number between 0 and 1 that shows the proportion of equal arcs starting in the window.

In [1]:
from estnltk import Text
from estnltk.syntax.scoring import las_score
from estnltk.converters.conll_importer import conll_to_text, add_layer_from_conll


text = conll_to_text(file='a.conll', syntax_layer='conll_a')
add_layer_from_conll(file='a.conll', text=text, syntax_layer='conll_b')

text
Milliseks kujuneb Riigikassa ja Ühispanga vahekord ? Minu arvates on Eesti pangandus tehnoloogiliselt maailma tasemel .

layer name,attributes,parent,enveloping,ambiguous,span count
words,,,,False,16
conll_a,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children, parent_deprel",,,False,16
conll_b,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children, parent_deprel",,,False,16


Change the second layer a little bit.

In [2]:
text.conll_b[3].deprel = '@X'

text.conll_a[3].deprel != text.conll_b[3].deprel

True

Calculate las score.

In [3]:
las_score(layer_a=text.conll_a,
          layer_b=text.conll_b,
          start=None,
          end=None
          )

0.9375

Compare first 4 spans.

In [4]:
las_score(text.conll_a, text.conll_b, 0, 4)

0.75

Compare spans starting from the fifth.

In [5]:
las_score(text.conll_a, text.conll_b, 4)

1.0

## Tag sliding las scores

In [6]:
from estnltk.taggers.syntax.syntax_las_tagger import SyntaxLasTagger

tagger = SyntaxLasTagger('conll_a', 'conll_b', window=3)
tagger

name,output layer,output attributes,input layers
SyntaxLasTagger,las,"('deprel_sequence', 'score')","('conll_a', 'conll_b')"

0,1
window,3


In [7]:
tagger.tag(text)
text.las

layer name,attributes,parent,enveloping,ambiguous,span count
las,"deprel_sequence, score",,conll_a,False,20

text,deprel_sequence,score
['Milliseks'],"('@ADVL',)",1.0
"['Milliseks', 'kujuneb']","('@ADVL', 'ROOT')",1.0
"['Milliseks', 'kujuneb', 'Riigikassa']","('@ADVL', 'ROOT', '@SUBJ')",1.0
"['kujuneb', 'Riigikassa', 'ja']","('ROOT', '@SUBJ', '@J')",0.6666666666666666
"['Riigikassa', 'ja', 'Ühispanga']","('@SUBJ', '@J', '@NN>')",0.6666666666666666
"['ja', 'Ühispanga', 'vahekord']","('@J', '@NN>', '@SUBJ')",0.6666666666666666
"['Ühispanga', 'vahekord', '?']","('@NN>', '@SUBJ', '@Punc')",1.0
"['vahekord', '?']","('@SUBJ', '@Punc')",1.0
['?'],"('@Punc',)",1.0
['Minu'],"('@ADVL',)",1.0


In [8]:
text.las.meta['aggregate_deprel_sequences']

{('@ADVL',): [1.0, 1.0],
 ('@ADVL', '@ADVL'): [1.0],
 ('@ADVL', '@ADVL', 'ROOT'): [1.0],
 ('@ADVL', '@NN>', '@ADVL'): [1.0],
 ('@ADVL', '@Punc'): [1.0],
 ('@ADVL', 'ROOT'): [1.0],
 ('@ADVL', 'ROOT', '@NN>'): [1.0],
 ('@ADVL', 'ROOT', '@SUBJ'): [1.0],
 ('@J', '@NN>', '@SUBJ'): [0.6666666666666666],
 ('@NN>', '@ADVL', '@Punc'): [1.0],
 ('@NN>', '@SUBJ', '@ADVL'): [1.0],
 ('@NN>', '@SUBJ', '@Punc'): [1.0],
 ('@Punc',): [1.0, 1.0],
 ('@SUBJ', '@ADVL', '@NN>'): [1.0],
 ('@SUBJ', '@J', '@NN>'): [0.6666666666666666],
 ('@SUBJ', '@Punc'): [1.0],
 ('ROOT', '@NN>', '@SUBJ'): [1.0],
 ('ROOT', '@SUBJ', '@J'): [0.6666666666666666]}

## `VislTagger`

In [9]:
from estnltk.taggers import VislTagger

text = Text('Ise alles tee esimesel poolel , vaevu kolmekümnekolmene .')
text.tag_layer(['morph_extended'])

visl_tagger = VislTagger()
visl_tagger.tag(text)

text.visl

layer name,attributes,parent,enveloping,ambiguous,span count
visl,"id, lemma, ending, partofspeech, subtype, mood, tense, voice, person, inf_form, number, case, polarity, number_format, capitalized, finiteness, subcat, clause_boundary, deprel, head",words,,True,9

text,id,lemma,ending,partofspeech,subtype,mood,tense,voice,person,inf_form,number,case,polarity,number_format,capitalized,finiteness,subcat,clause_boundary,deprel,head
Ise,1,ise,0,P,"['pos', 'det', 'refl']",_,_,_,_,_,pl,nom,_,_,cap,_,_,_,@ADVL,3
,1,ise,0,P,"['pos', 'det', 'refl']",_,_,_,_,_,sg,nom,_,_,cap,_,_,_,@ADVL,3
alles,2,alles,0,D,_,_,_,_,_,_,_,_,_,_,_,_,_,_,@ADVL,3
tee,3,tee,0,S,com,_,_,_,_,_,sg,gen,_,_,_,_,_,_,@NN>,0
,3,tee,0,S,com,_,_,_,_,_,sg,gen,_,_,_,_,_,_,@OBJ,0
esimesel,4,esimene,l,N,ord,_,_,_,_,_,sg,ad,_,l,_,_,_,_,@AN>,5
poolel,5,pool,l,S,com,_,_,_,_,_,sg,ad,_,_,_,_,_,_,"['@<NN', '@ADVL']",3
",",6,",",_,Z,Com,_,_,_,_,_,_,_,_,_,_,_,_,_,_,6
vaevu,7,vaevu,0,D,_,_,_,_,_,_,_,_,_,_,_,_,_,_,@ADVL,3
kolmekümnekolmene,8,kolme_kümne_kolmene,0,A,pos,_,_,_,_,_,sg,nom,_,_,_,_,_,_,@ADVL,5


## `ConllMorphTagger`

In [10]:
from estnltk.taggers import ConllMorphTagger

tagger = ConllMorphTagger(output_layer='conll_morph',  # default: 'conll_morph'
                          morph_extended_layer='morph_extended'  # default: 'morph_extended'
                          )
tagger

name,output layer,output attributes,input layers
ConllMorphTagger,conll_morph,"('id', 'form', 'lemma', 'upostag', 'xpostag', 'feats', 'head', 'deprel', 'deps', 'misc')","('morph_extended',)"


In [11]:
tagger.tag(text)

text.conll_morph

layer name,attributes,parent,enveloping,ambiguous,span count
conll_morph,"id, form, lemma, upostag, xpostag, feats, head, deprel, deps, misc",words,,True,9

text,id,form,lemma,upostag,xpostag,feats,head,deprel,deps,misc
Ise,1,Ise,ise,P,P,pos|det|refl|pl|nom,_,_,_,_
alles,2,alles,alles,D,D,_,_,_,_,_
tee,3,tee,tee,S,S,sg|gen,_,_,_,_
esimesel,4,esimesel,esimene,N,A,ord|sg|ad|l,_,_,_,_
poolel,5,poolel,pool,S,S,sg|ad,_,_,_,_
",",6,",",",",Z,Z,Com,_,_,_,_
vaevu,7,vaevu,vaevu,D,D,_,_,_,_,_
kolmekümnekolmene,8,kolmekümnekolmene,kolme_kümne_kolmene,A,A,sg|nom,_,_,_,_
.,9,.,.,Z,Z,Fst,_,_,_,_


Convert conll_morph layer to CoNLL-format string.

In [12]:
from estnltk.taggers.syntax.conll_morph_to_str import *

print(conll_to_str(text))

1	Ise	ise	P	P	pos|det|refl|pl|nom	_	_	_	_	
2	alles	alles	D	D	_	_	_	_	_	
3	tee	tee	S	S	sg|gen	_	_	_	_	
4	esimesel	esimene	N	A	ord|sg|ad|l	_	_	_	_	
5	poolel	pool	S	S	sg|ad	_	_	_	_	
6	,	,	Z	Z	Com	_	_	_	_	
7	vaevu	vaevu	D	D	_	_	_	_	_	
8	kolmekümnekolmene	kolme_kümne_kolmene	A	A	sg|nom	_	_	_	_	
9	.	.	Z	Z	Fst	_	_	_	_	




# Maltparser

Parse CoNLL-format string with maltparser.

In [13]:
from estnltk.taggers.syntax.maltparser import MaltParser

parser = MaltParser()
initial_output = parser.parse_text(text, return_type='conll')
print( '\n'.join( initial_output) )

1	Ise	ise	P	P	pos|det|refl|pl|nom	0	ROOT	_	_
2	alles	alles	D	D	_	3	@DN>	_	_
3	tee	tee	S	S	sg|gen	5	@NN>	_	_
4	esimesel	esimene	N	A	ord|sg|ad|l	5	@AN>	_	_
5	poolel	pool	S	S	sg|ad	1	@ADVL	_	_
6	,	,	Z	Z	Com	5	@Punc	_	_
7	vaevu	vaevu	D	D	_	8	@ADVL	_	_
8	kolmekümnekolmene	kolme_kümne_kolmene	A	A	sg|nom	1	@PRD	_	_
9	.	.	Z	Z	Fst	8	@Punc	_	_

