# Storing of `Text` objects with syntax layer in a PostgreSQL database

This tutorial demonstrates how to store and query EstNLTK `Text` objects with syntax layer in a PostgreSQL database.

In [1]:
from estnltk import Text, logger
from estnltk.taggers import VabamorfTagger, WordTagger
from estnltk.storage.postgres import PostgresStorage, delete_schema
from estnltk.storage.postgres import LayerQuery, WhereClause

## Access to the database

In [2]:
storage = PostgresStorage(host=None,
                          port=None,
                          dbname='test_db',
                          user=None,
                          password=None,
                          pgpass_file='~/.pgpass',
                          schema='my_schema',
                          role=None,
                          temporary=False,
                          create_schema_if_missing=True)

INFO:storage.py:55: connecting to host: 'localhost', port: '5432', dbname: 'test_db', user: 'postgres'
INFO:storage.py:76: new schema 'my_schema' created
INFO:storage.py:106: schema: 'my_schema', temporary: False, role: 'postgres'


In [3]:
collection = storage.add_collection('my_collection')
collection

INFO:storage.py:211: new empty collection 'my_collection' created


## Add texts

Read a text from a conll file.

In [4]:
from estnltk.converters.conll.conll_importer import conll_to_text

text = conll_to_text(file='example.conll', syntax_layer='malt_1')
text

text
"Milliseks kujuneb Riigikassa ja Ühispanga vahekord ? Minu arvates on Eesti pangandus tehnoloogiliselt maailma tasemel . Eesti riik on veel suhteliselt laisaks kliendiks ( panganduses on kasutusel termin "" laisk raha "" , see tähendab ebaratsionaalne hoiustajale , kuid väga hea pangale ) . Valitsused üldiselt ongi nn laisaks kliendiks . Ilmselt need vahekorrad normaliseeruvad , selles mõttes , et optimeerimise käigus riik kui panga hoiustaja hakkab ratsionaalsemalt käituma . Kas kindlustus- , väärtpaberi- ja pangainspektsioon ühendatakse ? Teoreetiliselt on see ehk vale , aga praktiliselt ratsionaalne ."

layer name,attributes,parent,enveloping,ambiguous,span count
sentences,,,words,False,7
words,,,,True,88
malt_1,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",,,False,88


Split by sentences

In [5]:
from estnltk_core.layer_operations import split_by_sentences

texts = split_by_sentences(text, layers_to_keep=['words', 'malt_1'])

texts[0]

text
Milliseks kujuneb Riigikassa ja Ühispanga vahekord ?

layer name,attributes,parent,enveloping,ambiguous,span count
words,,,,True,7
malt_1,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",,,False,7


Note the value of `dict_converter_module` attribute.

In [6]:
texts[0].malt_1.serialisation_module

'syntax_v0'

In [7]:
with collection.insert() as collection_insert:
    for text in texts:
        collection_insert(text)

INFO:collection_text_object_inserter.py:102: inserted 7 texts into the collection 'my_collection'


In [8]:
collection

Unnamed: 0,layer_type,attributes,ambiguous,sparse,parent,enveloping,meta
words,attached,(),True,False,,,[]
malt_1,attached,"(id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children)",False,False,,,[]


The layers inserted with the `Text` objects are stored in the same database table with the `Text` object and are called **attached** layers.

### Iterate collection

In [9]:
len(collection)

7

In [10]:
text = collection[0]
text

text
Milliseks kujuneb Riigikassa ja Ühispanga vahekord ?

layer name,attributes,parent,enveloping,ambiguous,span count
words,,,,True,7
malt_1,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",,,False,7


In [11]:
text.malt_1

layer name,attributes,parent,enveloping,ambiguous,span count
malt_1,"id, lemma, upostag, xpostag, feats, head, deprel, deps, misc, parent_span, children",,,False,7

text,id,lemma,upostag,xpostag,feats,head,deprel,deps,misc,parent_span,children
Milliseks,1,milline,P,P,"{'sg': '', 'tr': '', 'rel': ''}",2,@ADVL,,,"Span('kujuneb', [{'id': 2, 'lemma': 'kujune', 'upostag': 'V', 'xpostag': 'V', 'f ..., type: <class 'estnltk_core.layer.span.Span'>",()
kujuneb,2,kujune,V,V,"{'sg': '', 'ps3': '', 'pres': '', 'indic': ''}",0,ROOT,,,,"(""Span('Milliseks', [{'id': 1, 'lemma': 'milline', 'upostag': 'P', 'xpostag': 'P ..., type: <class 'tuple'>, length: 3"
Riigikassa,3,Riigi_kassa,S,H,"{'sg': '', 'nom': ''}",2,@SUBJ,,,"Span('kujuneb', [{'id': 2, 'lemma': 'kujune', 'upostag': 'V', 'xpostag': 'V', 'f ..., type: <class 'estnltk_core.layer.span.Span'>",()
ja,4,ja,J,Jc,,6,@J,,,"Span('vahekord', [{'id': 6, 'lemma': 'vahe_kord', 'upostag': 'S', 'xpostag': 'S' ..., type: <class 'estnltk_core.layer.span.Span'>",()
Ühispanga,5,ühis_pank,S,S,"{'sg': '', 'gen': ''}",6,@NN>,,,"Span('vahekord', [{'id': 6, 'lemma': 'vahe_kord', 'upostag': 'S', 'xpostag': 'S' ..., type: <class 'estnltk_core.layer.span.Span'>",()
vahekord,6,vahe_kord,S,S,"{'sg': '', 'nom': ''}",2,@SUBJ,,,"Span('kujuneb', [{'id': 2, 'lemma': 'kujune', 'upostag': 'V', 'xpostag': 'V', 'f ..., type: <class 'estnltk_core.layer.span.Span'>","(""Span('ja', [{'id': 4, 'lemma': 'ja', 'upostag': 'J', 'xpostag': 'Jc', 'feats': ..., type: <class 'tuple'>, length: 3"
?,7,?,Z,Z,{'Int': ''},6,@Punc,,,"Span('vahekord', [{'id': 6, 'lemma': 'vahe_kord', 'upostag': 'S', 'xpostag': 'S' ..., type: <class 'estnltk_core.layer.span.Span'>",()


In [12]:
storage.delete_collection( collection.name )

In [13]:
delete_schema(storage)
storage.close()