# Export to json and import from json

In [1]:
from estnltk import Span, Layer, Text
from estnltk.tests import new_text

## Export and import `Text` objects
### Export to json string

In [2]:
from estnltk.converters import text_to_json

In [3]:
#PYTEST_VALIDATE_IGNORE_OUTPUT
text = Text('Ööbik laulab.').analyse('morphology')
text.meta['year'] = 2017
json_text = text_to_json(text)
json_text

'{"text": "Ööbik laulab.", "meta": {"year": 2017}, "layers": [{"name": "words", "attributes": ["normalized_form"], "parent": null, "enveloping": null, "ambiguous": true, "serialisation_module": null, "meta": {}, "spans": [{"base_span": [0, 5], "annotations": [{"normalized_form": null}]}, {"base_span": [6, 12], "annotations": [{"normalized_form": null}]}, {"base_span": [12, 13], "annotations": [{"normalized_form": null}]}]}, {"name": "morph_analysis", "attributes": ["lemma", "root", "root_tokens", "ending", "clitic", "form", "partofspeech"], "parent": "words", "enveloping": null, "ambiguous": true, "serialisation_module": null, "meta": {}, "spans": [{"base_span": [0, 5], "annotations": [{"lemma": "ööbik", "root": "ööbik", "root_tokens": ["ööbik"], "ending": "0", "clitic": "", "form": "sg n", "partofspeech": "S"}]}, {"base_span": [6, 12], "annotations": [{"lemma": "laulma", "root": "laul", "root_tokens": ["laul"], "ending": "b", "clitic": "", "form": "b", "partofspeech": "V"}]}, {"base_s

### Export to json file

In [4]:
filename = 'json_exporter_importer_demo.json'
text_to_json(text, file=filename)

### Import from string

In [5]:
from estnltk.converters import json_to_text
text_import = json_to_text(json_text=json_text)
text_import

text
Ööbik laulab.

0,1
year,2017

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
words,normalized_form,,,True,3
morph_analysis,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3


### Import from json file

In [6]:
text_import = json_to_text(file=filename)
text_import

text
Ööbik laulab.

0,1
year,2017

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
words,normalized_form,,,True,3
morph_analysis,"lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3


### Quick test

In [7]:
assert text_import == text

In [8]:
import os
os.remove(filename)

## Export and import `Annotation` objects

In [9]:
from estnltk.converters import annotation_to_json
from estnltk.converters import json_to_annotation

The examples below are based on the following span with three annotations.

In [10]:
new_text(5).layer_1[2]

text,attr,attr_1
kakskümmend,L1-2,KAKS
,L1-2,KÜMME
,L1-2,KAKSKÜMMEND


### Export one annotation

In [11]:
annotation = new_text(5).layer_1[2].annotations[0]

annotation_to_json(annotation)

'{"attr": "L1-2", "attr_1": "KAKS"}'

### Export a list of annotations

In [12]:
from estnltk.converters import to_json, annotation_to_dict

annotations = new_text(5).layer_1[2].annotations

to_json([annotation_to_dict(annotation) for annotation in annotations])

'[{"attr": "L1-2", "attr_1": "KAKS"}, {"attr": "L1-2", "attr_1": "KÜMME"}, {"attr": "L1-2", "attr_1": "KAKSKÜMMEND"}]'

### Import an annotation
An annotation needs to know its attributes which are derived from the layer. So, we need to define a layer and a span first.

In [13]:
json_to_annotation(span=None, json_str='{"attr_1": "KAKS", "attr": "L1-2"}')

Annotation(None, {'attr': 'L1-2', 'attr_1': 'KAKS'})

### Import a list of annotations

In [14]:
from estnltk.converters import from_json, dict_to_annotation

json_str = ('[{"attr_1": "KAKS", "attr": "L1-2"},'
            ' {"attr_1": "KÜMME", "attr": "L1-2"},'
            ' {"attr_1": "KAKSKÜMMEND", "attr": "L1-2"}]')

[dict_to_annotation(None, annotation_dict) for annotation_dict in from_json(json_str)]

[Annotation(None, {'attr': 'L1-2', 'attr_1': 'KAKS'}),
 Annotation(None, {'attr': 'L1-2', 'attr_1': 'KÜMME'}),
 Annotation(None, {'attr': 'L1-2', 'attr_1': 'KAKSKÜMMEND'})]