In [1]:
from estnltk import Text
from estnltk.converters.TCF_exporter import export_TCF
from estnltk.converters.TCF_importer import import_TCF

# TCF exporter and importer

Export `Text` object to TCF and import from `TCF`.
## Short example

In [2]:
# create a text object with segmentation layers
text = Text('Tere maailm!')
text.analyse('segmentation')

text
Tere maailm!

layer name,attributes,parent,enveloping,ambiguous,span count
paragraphs,,,sentences,False,1
sentences,,,words,False,1
words,normalized_form,,,False,3


In [3]:
# export the text object to a xml file and also print out the xml
# paragraphs layer is not exported
print(export_TCF(text, file='tcf/tcf_short.xml'))

<D-Spin version="0.4" xmlns="http://www.dspin.de/data">
  <MetaData xmlns="http://www.dspin.de/data/metadata"/>
  <TextCorpus lang="et" xmlns="http://www.dspin.de/data/textcorpus">
    <text>Tere maailm!</text>
    <tc:tokens xmlns:tc="http://www.dspin.de/data/textcorpus">
      <tc:token ID="t0" start="0" end="4">Tere</tc:token>
      <tc:token ID="t1" start="5" end="11">maailm</tc:token>
      <tc:token ID="t2" start="11" end="12">!</tc:token>
    </tc:tokens>
    <tc:sentences xmlns:tc="http://www.dspin.de/data/textcorpus">
      <tc:sentence ID="s0" tokenIDs="t0 t1 t2"/>
    </tc:sentences>
  </TextCorpus>
</D-Spin>



In [4]:
# The TCF importer reads an xml file and returns a text object.
import_TCF(file='tcf/tcf_short.xml')

AssertionError: 

## Longer example

In [None]:
from estnltk.text import Layer

text = Text('Karin, kes lendab New Yorki, tahab seal veeta puhkuse. Ta tuleb teisel augustil tagasi.')
text.analyse('segmentation')
text.analyse('morphology')
# clauses layer
layer = Layer(name='clauses', enveloping='words')
layer.add_span(text.words[2:6])
spl = text.words[0:1]
spl.spans.extend(text.words.spans[7:11])
layer.add_span(spl)
layer.add_span(text.words[12:17])
text['clauses'] = layer

# verb_chains layer
layer = Layer(name='verb_chains', enveloping='words')
layer.add_span(text.words[3:4])
layer.add_span(text.words[7:10:2])
layer.add_span(text.words[13:17:3])
text['verb_chains'] = layer

# time_phrases layer
layer = Layer(name='time_phrases', enveloping='words')
layer.add_span(text.words[14:16])
text['time_phrases'] = layer

text

In [None]:
text['sentences']

In [None]:
text['clauses']

In [None]:
text['verb_chains']

In [None]:
text['time_phrases']

The default version of the output of the TCF exporter is 0.4. This produces an xml document that can be used as as an input for [Weblicht service](https://weblicht.sfs.uni-tuebingen.de/weblicht/).

In [None]:
export_TCF(text, 'tcf/tcf.xml')
None

The output is in the file [tcf/tcf.xml](tcf/tcf.xml).

The version '0.5' produces an xml for possible future TCF version. This includes layers for clauses, verb_chains and time_phrases.

In [None]:
print(export_TCF(text, version='0.5'))

## Tests for exporter and importer

In [None]:
text

In [None]:
# version 0.4
export_TCF(import_TCF(export_TCF(text))) == export_TCF(text)

In [None]:
#version 0.5
export_TCF(import_TCF(export_TCF(text, version='0.5')), version='0.5') == export_TCF(text, version='0.5')

In [None]:
# version 0.5
del text.normalized_words
del text.paragraphs

text == import_TCF(export_TCF(text, version='0.5'))

In [None]:
#version 0.4
del text.clauses
del text.verb_chains
del text.time_phrases

text == import_TCF(export_TCF(text))