# Various sanity checks 

## I. Validate source collection

In [14]:
# load configuration

import configparser
import os

config = configparser.ConfigParser()
config_file = 'config\example_configuration.ini'

file_name = os.path.abspath(os.path.expanduser(os.path.expandvars(str(config_file))))

if not os.path.exists(file_name):
    raise ValueError("File {file} does not exist".format(file=str(config_file)))

if len(config.read(file_name)) != 1:
    raise ValueError("File {file} is not accessible or is not in valid INI format".format(file=config_file))

for option in ["host", "port", "database", "username", "password", "role", "schema", "collection"]:
    if not config.has_option('source_database', option):
        prelude = "Error in file {}\n".format(file_name) if len(file_name) > 0 else ""
        raise ValueError(
            "{prelude}Missing option {option} in the section [{section}]".format(
                prelude=prelude, option=option, section='source_database'
            )
        )

config.read(file_name)

['C:\\Users\\rasmu\\OneDrive\\Dokumendid\\estnltk-model-data\\named_entity_recognition\\recall_estimation\\data_generation\\amundsen_01\\config\\example_configuration.ini']

In [15]:
# check that we can connect

from estnltk.storage.postgres import PostgresStorage

dbname = config['source_database']['database']
user = config['source_database']['username']
password = config['source_database']['password']
host = config['source_database']['host']
port = config['source_database']['port']
role = config['source_database']['role']
schema = config['source_database']['schema']
collection = config['source_database']['collection']


storage = PostgresStorage(host=host,
                          port=int(port),
                          dbname=dbname,
                          user=user,
                          password=password,
                          schema=schema,
                          role=role,
                          temporary=False)

INFO:storage.py:58: connecting to host: 'postgres.keeleressursid.ee', port: 5432, dbname: 'estonian-text-corpora', user: 'rasmusm'
INFO:storage.py:108: schema: 'estonian_text_corpora', temporary: False, role: 'estonian_text_corpora_read'


In [16]:
# validate that the collection has expected structure

collection = storage[collection]
assert len(collection) == 21415543
assert collection.has_layer('v171_named_entities')
assert collection.has_layer('v172_geo_terms')

## II. Validate target database 

In [22]:
# load configuration

import configparser
import os

config = configparser.ConfigParser()
config_file = 'config\example_configuration.ini'

file_name = os.path.abspath(os.path.expanduser(os.path.expandvars(str(config_file))))

if not os.path.exists(file_name):
    raise ValueError("File {file} does not exist".format(file=str(config_file)))

if len(config.read(file_name)) != 1:
    raise ValueError("File {file} is not accessible or is not in valid INI format".format(file=config_file))

for option in ["host", "port", "database", "username", "password", "schema", "collection"]:
    if not config.has_option('target_database', option):
        prelude = "Error in file {}\n".format(file_name) if len(file_name) > 0 else ""
        raise ValueError(
            "{prelude}Missing option {option} in the section [{section}]".format(
                prelude=prelude, option=option, section='target_database'
            )
        )

config.read(file_name)

['C:\\Users\\rasmu\\OneDrive\\Dokumendid\\estnltk-model-data\\named_entity_recognition\\recall_estimation\\data_generation\\amundsen_01\\config\\example_configuration.ini']

In [23]:
# check that we can connect

from estnltk.storage.postgres import PostgresStorage

dbname = config['target_database']['database']
user = config['target_database']['username']
password = config['target_database']['password']
host = config['target_database']['host']
port = config['target_database']['port']
schema = config['target_database']['schema']
collection = config['target_database']['collection']

localstorage = PostgresStorage(host=host,
                          port=int(port),
                          dbname=dbname,
                          user=user,
                          password=password,
                          schema=schema,
                          role=None,
                          temporary=False)

INFO:storage.py:58: connecting to host: 'localhost', port: 5432, dbname: 'ner_test', user: 'postgres'
INFO:storage.py:108: schema: 'my_schema', temporary: False, role: 'postgres'
