# Export to JSON and import from JSON

EstNLTK has functions for converting data structures (`Text`, `Layer`, `Annotation` objects) into [JSON](https://www.json.org/) and back.

## `Text` objects
### Export to json string

In [1]:
from estnltk import Text
from estnltk.converters import text_to_json

In [2]:
#PYTEST_VALIDATE_IGNORE_OUTPUT
text = Text('Ööbik laulab.').tag_layer('morph_analysis')
text.meta['year'] = 2017
json_text = text_to_json(text)
json_text

'{"text": "Ööbik laulab.", "meta": {"year": 2017}, "layers": [{"name": "tokens", "attributes": [], "secondary_attributes": [], "parent": null, "enveloping": null, "ambiguous": false, "serialisation_module": null, "meta": {}, "spans": [{"base_span": [0, 5], "annotations": [{}]}, {"base_span": [6, 12], "annotations": [{}]}, {"base_span": [12, 13], "annotations": [{}]}]}, {"name": "compound_tokens", "attributes": ["type", "normalized"], "secondary_attributes": [], "parent": null, "enveloping": "tokens", "ambiguous": false, "serialisation_module": null, "meta": {}, "spans": []}, {"name": "words", "attributes": ["normalized_form"], "secondary_attributes": [], "parent": null, "enveloping": null, "ambiguous": true, "serialisation_module": null, "meta": {}, "spans": [{"base_span": [0, 5], "annotations": [{"normalized_form": null}]}, {"base_span": [6, 12], "annotations": [{"normalized_form": null}]}, {"base_span": [12, 13], "annotations": [{"normalized_form": null}]}]}, {"name": "morph_analysis

### Export/save to json file

In [3]:
filename = 'json_exporter_importer_demo.json'
text_to_json(text, file=filename)

### Import from string

In [4]:
from estnltk.converters import json_to_text
text_import = json_to_text(json_text=json_text)
text_import

text
Ööbik laulab.

0,1
year,2017

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,3
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,3
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3


### Import/load from json file

In [5]:
text_import = json_to_text(file=filename)
text_import

text
Ööbik laulab.

0,1
year,2017

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,1
tokens,,,,False,3
compound_tokens,"type, normalized",,tokens,False,0
words,normalized_form,,,True,3
morph_analysis,"normalized_text, lemma, root, root_tokens, ending, clitic, form, partofspeech",words,,True,3


### Quick test

In [6]:
assert text_import == text

In [7]:
# Clean up
import os
os.remove(filename)

---

## `Layer` objects
### Export to json string

In [8]:
from estnltk.converters import layer_to_json

In [9]:
json_layer = layer_to_json(text['words'])
json_layer

'{"name": "words", "attributes": ["normalized_form"], "secondary_attributes": [], "parent": null, "enveloping": null, "ambiguous": true, "serialisation_module": null, "meta": {}, "spans": [{"base_span": [0, 5], "annotations": [{"normalized_form": null}]}, {"base_span": [6, 12], "annotations": [{"normalized_form": null}]}, {"base_span": [12, 13], "annotations": [{"normalized_form": null}]}]}'

### Export/save to json file

In [10]:
layer_to_json(text['words'], file='test_words_layer_export.json')

### Import from string

For importing a layer from JSON, corresponding Text object must also be provided:

In [11]:
from estnltk.converters import json_to_layer
layer_import = json_to_layer(text, json_str=json_layer)
layer_import

layer name,attributes,parent,enveloping,ambiguous,span count
words,normalized_form,,,True,3

text,normalized_form
Ööbik,
laulab,
.,


### Import/load from json file

In [12]:
layer_import2 = json_to_layer(text, file='test_words_layer_export.json')
layer_import2

layer name,attributes,parent,enveloping,ambiguous,span count
words,normalized_form,,,True,3

text,normalized_form
Ööbik,
laulab,
.,


### Quick test

In [13]:
assert text['words'] == layer_import

In [14]:
assert layer_import == layer_import2

In [15]:
# Clean up
import os
os.remove('test_words_layer_export.json')

---

---

## Export and import `Annotation` objects

In [16]:
from estnltk.converters import annotation_to_json
from estnltk.converters import json_to_annotation

The examples below are based on the following span with three annotations.

In [17]:
from estnltk.tests import new_text
new_text(5).layer_1[2]

text,attr,attr_1
kakskümmend,L1-2,KAKS
,L1-2,KÜMME
,L1-2,KAKSKÜMMEND


### Export one annotation

In [18]:
annotation = new_text(5).layer_1[2].annotations[0]

annotation_to_json(annotation)

'{"attr": "L1-2", "attr_1": "KAKS"}'

### Export a list of annotations

In [19]:
from estnltk.converters import to_json, annotation_to_dict

annotations = new_text(5).layer_1[2].annotations

to_json([annotation_to_dict(annotation) for annotation in annotations])

'[{"attr": "L1-2", "attr_1": "KAKS"}, {"attr": "L1-2", "attr_1": "KÜMME"}, {"attr": "L1-2", "attr_1": "KAKSKÜMMEND"}]'

### Import an annotation
An annotation needs to know its attributes which are derived from the layer. So, we need to define a layer and a span first.

In [20]:
json_to_annotation(span=None, json_str='{"attr_1": "KAKS", "attr": "L1-2"}')

Annotation(None, {'attr': 'L1-2', 'attr_1': 'KAKS'})

### Import a list of annotations

In [21]:
from estnltk.converters import from_json, dict_to_annotation

json_str = ('[{"attr_1": "KAKS", "attr": "L1-2"},'
            ' {"attr_1": "KÜMME", "attr": "L1-2"},'
            ' {"attr_1": "KAKSKÜMMEND", "attr": "L1-2"}]')

[dict_to_annotation(None, annotation_dict) for annotation_dict in from_json(json_str)]

[Annotation(None, {'attr': 'L1-2', 'attr_1': 'KAKS'}),
 Annotation(None, {'attr': 'L1-2', 'attr_1': 'KÜMME'}),
 Annotation(None, {'attr': 'L1-2', 'attr_1': 'KAKSKÜMMEND'})]