# Syntax

## Las score

Las score is a number between 0 and 1 that shows the proportion of equal arcs starting in the window.

In [1]:
from estnltk.syntax.scoring import las_score
from estnltk.converters.conll_importer import conll_to_text, add_layer_from_conll


text = conll_to_text(file='a.conll', syntax_layer='conll_a')
add_layer_from_conll(file='a.conll', text=text, syntax_layer='conll_b')

text
Milliseks kujuneb Riigikassa ja Ühispanga vahekord ? Minu arvates on Eesti pangandus tehnoloogiliselt maailma tasemel .

layer name,attributes,parent,enveloping,ambiguous,span count
words,,,,False,16
conll_a,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",,,False,16
conll_b,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",,,False,16


Change the second layer a little bit.

In [2]:
text.conll_b[3].deprel = '@X'

text.conll_a[3].deprel != text.conll_b[3].deprel

True

Calculate las score.

In [3]:
las_score(layer_a=text.conll_a,
          layer_b=text.conll_b,
          start=None,
          end=None
          )

0.9375

Compare first 4 spans.

In [4]:
las_score(text.conll_a, text.conll_b, 0, 4)

0.75

Compare spans starting from the fifth.

In [5]:
las_score(text.conll_a, text.conll_b, 4)

1.0

## Tag sliding las scores

In [6]:
from estnltk.taggers.syntax.syntax_las_tagger import SyntaxLasTagger

tagger = SyntaxLasTagger('conll_a', 'conll_b', window=3)
tagger

name,output layer,output attributes,input layers
SyntaxLasTagger,las,"('deprel_sequence', 'score')","('conll_a', 'conll_b')"

0,1
window,3


In [7]:
tagger.tag(text)
text.las

layer name,attributes,parent,enveloping,ambiguous,span count
las,"deprel_sequence, score",,conll_a,False,20

text,deprel_sequence,score
['Milliseks'],"('@ADVL',)",1.0
"['Milliseks', 'kujuneb']","('@ADVL', 'ROOT')",1.0
"['Milliseks', 'kujuneb', 'Riigikassa']","('@ADVL', 'ROOT', '@SUBJ')",1.0
"['kujuneb', 'Riigikassa', 'ja']","('ROOT', '@SUBJ', '@J')",0.6666666666666666
"['Riigikassa', 'ja', 'Ühispanga']","('@SUBJ', '@J', '@NN>')",0.6666666666666666
"['ja', 'Ühispanga', 'vahekord']","('@J', '@NN>', '@SUBJ')",0.6666666666666666
"['Ühispanga', 'vahekord', '?']","('@NN>', '@SUBJ', '@Punc')",1.0
"['vahekord', '?']","('@SUBJ', '@Punc')",1.0
['?'],"('@Punc',)",1.0
['Minu'],"('@ADVL',)",1.0


In [8]:
text.las.meta['aggregate_deprel_sequences']

{('@ADVL',): [1.0, 1.0],
 ('@ADVL', '@ADVL'): [1.0],
 ('@ADVL', '@ADVL', 'ROOT'): [1.0],
 ('@ADVL', '@NN>', '@ADVL'): [1.0],
 ('@ADVL', '@Punc'): [1.0],
 ('@ADVL', 'ROOT'): [1.0],
 ('@ADVL', 'ROOT', '@NN>'): [1.0],
 ('@ADVL', 'ROOT', '@SUBJ'): [1.0],
 ('@J', '@NN>', '@SUBJ'): [0.6666666666666666],
 ('@NN>', '@ADVL', '@Punc'): [1.0],
 ('@NN>', '@SUBJ', '@ADVL'): [1.0],
 ('@NN>', '@SUBJ', '@Punc'): [1.0],
 ('@Punc',): [1.0, 1.0],
 ('@SUBJ', '@ADVL', '@NN>'): [1.0],
 ('@SUBJ', '@J', '@NN>'): [0.6666666666666666],
 ('@SUBJ', '@Punc'): [1.0],
 ('ROOT', '@NN>', '@SUBJ'): [1.0],
 ('ROOT', '@SUBJ', '@J'): [0.6666666666666666]}

## `ConllMorphTagger`

In [9]:
from estnltk.taggers.syntax.conll_morph_tagger import ConllMorphTagger

tagger = ConllMorphTagger(output_layer='conll_morph',  # default: 'conll_morph'
                          morph_extended_layer='morph_extended'  # default: 'morph_extended'
                          )
tagger

name,output layer,output attributes,input layers
ConllMorphTagger,conll_morph,"('lemma', 'upostag', 'xpostag', 'feats')","('morph_extended',)"


In [10]:
text.tag_layer(['morph_extended'])

tagger.tag(text)

text.conll_morph

layer name,attributes,parent,enveloping,ambiguous,span count
conll_morph,"lemma, upostag, xpostag, feats",words,,True,16

text,lemma,upostag,xpostag,feats
Milliseks,milline,,,
kujuneb,kujunema,,,
Riigikassa,riigikassa,,,
ja,ja,,,
Ühispanga,Ühispank,,,
vahekord,vahekord,,,
?,?,,,
Minu,mina,,,
arvates,arvama,,,
on,olema,,,
