Skip to content

Commit

Permalink
Adds an option to do the ANALYZE in datastore_upsert and datastore_de…
Browse files Browse the repository at this point in the history
…lete
  • Loading branch information
David Read committed Oct 26, 2018
1 parent 8aa298d commit 6428c48
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 75 deletions.
1 change: 1 addition & 0 deletions ckanext/datastore/backend/postgres.py
Expand Up @@ -1163,6 +1163,7 @@ def validate(context, data_dict):
data_dict_copy.pop('include_total', None)
data_dict_copy.pop('total_estimation_threshold', None)
data_dict_copy.pop('records_format', None)
data_dict_copy.pop('calculate_record_count', None)

for key, values in data_dict_copy.iteritems():
if not values:
Expand Down
2 changes: 1 addition & 1 deletion ckanext/datastore/logic/action.py
Expand Up @@ -338,7 +338,7 @@ def datastore_delete(context, data_dict):
:rtype: dictionary
'''
schema = context.get('schema', dsschema.datastore_upsert_schema())
schema = context.get('schema', dsschema.datastore_delete_schema())
backend = DatastoreBackend.get_active_backend()

# Remove any applied filters before running validation.
Expand Down
4 changes: 4 additions & 0 deletions ckanext/datastore/logic/schema.py
Expand Up @@ -137,6 +137,8 @@ def datastore_upsert_schema():
'id': [ignore_missing],
'method': [ignore_missing, text_type, OneOf(
['upsert', 'insert', 'update'])],
'calculate_record_count': [ignore_missing, default(False),
boolean_validator],
'dry_run': [ignore_missing, boolean_validator],
'__junk': [empty],
'__before': [rename('id', 'resource_id')]
Expand All @@ -149,6 +151,8 @@ def datastore_delete_schema():
'resource_id': [not_missing, not_empty, text_type],
'force': [ignore_missing, boolean_validator],
'id': [ignore_missing],
'calculate_record_count': [ignore_missing, default(False),
boolean_validator],
'__junk': [empty],
'__before': [rename('id', 'resource_id')]
}
Expand Down
15 changes: 15 additions & 0 deletions ckanext/datastore/tests/helpers.py
Expand Up @@ -60,6 +60,21 @@ def set_url_type(resources, user):
p.toolkit.get_action('resource_update')(context, resource)


def execute_sql(sql, *args):
engine = db.get_write_engine()
session = orm.scoped_session(orm.sessionmaker(bind=engine))
return session.connection().execute(sql, *args)


def when_was_last_analyze(resource_id):
results = execute_sql(
'''SELECT last_analyze
FROM pg_stat_user_tables
WHERE relname=%s;
''', resource_id).fetchall()
return results[0][0]


class DatastoreFunctionalTestBase(FunctionalTestBase):
_load_plugins = (u'datastore', )

Expand Down
42 changes: 12 additions & 30 deletions ckanext/datastore/tests/test_create.py
@@ -1,7 +1,6 @@
# encoding: utf-8

import json
import nose
from nose.tools import assert_equal, assert_not_equal, raises

import sqlalchemy.orm as orm
Expand All @@ -11,13 +10,13 @@
import ckan.plugins as p
import ckan.lib.create_test_data as ctd
import ckan.model as model
import ckan.tests.legacy as tests
import ckan.tests.helpers as helpers
import ckan.tests.factories as factories

import ckanext.datastore.backend.postgres as db
from ckanext.datastore.tests.helpers import (
set_url_type, DatastoreFunctionalTestBase, DatastoreLegacyTestBase)
set_url_type, DatastoreFunctionalTestBase, DatastoreLegacyTestBase,
execute_sql, when_was_last_analyze)
from ckan.plugins.toolkit import ValidationError


Expand Down Expand Up @@ -163,7 +162,7 @@ def _has_index_on_field(self, resource_id, field):
pg_class.relname = %s
"""
index_name = db._generate_index_name(resource_id, field)
results = self._execute_sql(sql, index_name).fetchone()
results = execute_sql(sql, index_name).fetchone()
return bool(results)

def _get_index_names(self, resource_id):
Expand All @@ -180,14 +179,9 @@ def _get_index_names(self, resource_id):
AND t.relkind = 'r'
AND t.relname = %s
"""
results = self._execute_sql(sql, resource_id).fetchall()
results = execute_sql(sql, resource_id).fetchall()
return [result[0] for result in results]

def _execute_sql(self, sql, *args):
engine = db.get_write_engine()
session = orm.scoped_session(orm.sessionmaker(bind=engine))
return session.connection().execute(sql, *args)

def test_sets_datastore_active_on_resource_on_create(self):
resource = factories.Resource()

Expand Down Expand Up @@ -244,29 +238,25 @@ def test_create_exceeds_column_name_limit(self):
}
result = helpers.call_action('datastore_create', **data)

def test_analyze_not_run_by_default(self):
package = factories.Dataset(resources=[
{'url': 'https://example.com/file.csv', 'format': 'csv', 'name': 'Image 1'}])
resource_id = package['resources'][0]['id']
def test_calculate_record_count_is_false(self):
resource = factories.Resource()
data = {
'resource_id': resource_id,
'resource_id': resource['id'],
'fields': [{'id': 'name', 'type': 'text'},
{'id': 'age', 'type': 'text'}],
'records': [{"name": "Sunita", "age": "51"},
{"name": "Bowan", "age": "68"}],
'force': True,
}
helpers.call_action('datastore_create', **data)
last_analyze = self._when_was_last_analyze(resource_id)
last_analyze = when_was_last_analyze(resource['id'])
assert_equal(last_analyze, None)

def test_create_with_records(self):
def test_calculate_record_count(self):
# how datapusher loads data (send_resource_to_datastore)
package = factories.Dataset(resources=[
{'url': 'https://example.com/file.csv', 'format': 'csv', 'name': 'Image 1'}])
resource_id = package['resources'][0]['id']
resource = factories.Resource()
data = {
'resource_id': resource_id,
'resource_id': resource['id'],
'fields': [{'id': 'name', 'type': 'text'},
{'id': 'age', 'type': 'text'}],
'records': [{"name": "Sunita", "age": "51"},
Expand All @@ -275,17 +265,9 @@ def test_create_with_records(self):
'force': True,
}
helpers.call_action('datastore_create', **data)
last_analyze = self._when_was_last_analyze(resource_id)
last_analyze = when_was_last_analyze(resource['id'])
assert_not_equal(last_analyze, None)

def _when_was_last_analyze(self, resource_id):
results = self._execute_sql(
'''SELECT last_analyze
FROM pg_stat_user_tables
WHERE relname=%s;
''', resource_id).fetchall()
return results[0][0]


class TestDatastoreCreate(DatastoreLegacyTestBase):
sysadmin_user = None
Expand Down
115 changes: 79 additions & 36 deletions ckanext/datastore/tests/test_delete.py
@@ -1,37 +1,106 @@
# encoding: utf-8

import json
import nose
from nose.tools import assert_equal
from nose.tools import assert_equal, assert_not_equal, assert_raises

import sqlalchemy
import sqlalchemy.orm as orm

import ckan.plugins as p
import ckan.lib.create_test_data as ctd
import ckan.model as model
import ckan.tests.legacy as tests
from ckan.tests import helpers
from ckan.plugins.toolkit import ValidationError
import ckan.tests.factories as factories
from ckan.logic import NotFound
import ckanext.datastore.backend.postgres as db
from ckanext.datastore.tests.helpers import (
rebuild_all_dbs, set_url_type,
set_url_type, when_was_last_analyze, execute_sql,
DatastoreFunctionalTestBase, DatastoreLegacyTestBase)

assert_raises = nose.tools.assert_raises

class TestDatastoreDelete(DatastoreFunctionalTestBase):
def test_delete_basic(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'aliases': u'b\xfck2',
'fields': [{'id': 'book', 'type': 'text'},
{'id': 'author', 'type': 'text'},
{'id': 'rating with %', 'type': 'text'}],
'records': [{'book': 'annakarenina', 'author': 'tolstoy',
'rating with %': '90%'},
{'book': 'warandpeace', 'author': 'tolstoy',
'rating with %': '42%'}]
}
helpers.call_action('datastore_create', **data)
data = {
'resource_id': resource['id'],
'force': True,
}
helpers.call_action('datastore_delete', **data)

results = execute_sql(u'select 1 from pg_views where viewname = %s', u'b\xfck2')
assert results.rowcount == 0

# check the table is gone
results = execute_sql(
u'''SELECT table_name
FROM information_schema.tables
WHERE table_name=%s;''',
resource['id'])
assert results.rowcount == 0

class TestDatastoreDelete(DatastoreLegacyTestBase):
def test_calculate_record_count_is_false(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'fields': [{'id': 'name', 'type': 'text'},
{'id': 'age', 'type': 'text'}],
'records': [{"name": "Sunita", "age": "51"},
{"name": "Bowan", "age": "68"}],
}
helpers.call_action('datastore_create', **data)
data = {
'resource_id': resource['id'],
'filters': {'name': 'Bowan'},
'force': True,
}
helpers.call_action('datastore_delete', **data)
last_analyze = when_was_last_analyze(resource['id'])
assert_equal(last_analyze, None)

def test_calculate_record_count(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'fields': [{'id': 'name', 'type': 'text'},
{'id': 'age', 'type': 'text'}],
'records': [{"name": "Sunita", "age": "51"},
{"name": "Bowan", "age": "68"}],
}
helpers.call_action('datastore_create', **data)
data = {
'resource_id': resource['id'],
'filters': {'name': 'Bowan'},
'calculate_record_count': True,
'force': True,
}
helpers.call_action('datastore_delete', **data)
last_analyze = when_was_last_analyze(resource['id'])
assert_not_equal(last_analyze, None)


class TestDatastoreDeleteLegacy(DatastoreLegacyTestBase):
sysadmin_user = None
normal_user = None
Session = None

@classmethod
def setup_class(cls):
cls.app = helpers._get_test_app()
super(TestDatastoreDelete, cls).setup_class()
super(TestDatastoreDeleteLegacy, cls).setup_class()
ctd.CreateTestData.create()
cls.sysadmin_user = model.User.get('testsysadmin')
cls.normal_user = model.User.get('annafan')
Expand Down Expand Up @@ -74,32 +143,6 @@ def _delete(self):
assert res_dict['result'] == data
return res_dict

def test_delete_basic(self):
self._create()
self._delete()
resource_id = self.data['resource_id']
c = self.Session.connection()

# It's dangerous to build queries as someone could inject sql.
# It's okay here as it is a test but don't use it anyhwere else!
results = c.execute(
u"select 1 from pg_views where viewname = '{0}'".format(
self.data['aliases']
)
)
assert results.rowcount == 0

try:
# check that data was actually deleted: this should raise a
# ProgrammingError as the table should not exist any more
c.execute(u'select * from "{0}";'.format(resource_id))
raise Exception("Data not deleted")
except sqlalchemy.exc.ProgrammingError as e:
expected_msg = 'relation "{0}" does not exist'.format(resource_id)
assert expected_msg in str(e)

self.Session.remove()

def test_datastore_deleted_during_resource_deletion(self):
package = factories.Dataset()
data = {
Expand Down Expand Up @@ -320,7 +363,7 @@ def test_delete_nonexistant(self):
else:
assert 0, u'no validation error'

def test_delete_if_exitst(self):
def test_delete_if_exists(self):
helpers.call_action(
u'datastore_function_delete',
name=u'test_not_there_either',
Expand Down
52 changes: 44 additions & 8 deletions ckanext/datastore/tests/test_upsert.py
@@ -1,26 +1,21 @@
# encoding: utf-8

import json
import nose
import datetime
from nose.tools import assert_equal, assert_not_equal

import sqlalchemy.orm as orm

import ckan.plugins as p
import ckan.lib.create_test_data as ctd
import ckan.model as model
import ckan.tests.legacy as tests
import ckan.tests.helpers as helpers
import ckan.tests.factories as factories
from ckan.plugins.toolkit import ValidationError

from ckan.common import config

import ckanext.datastore.backend.postgres as db
from ckanext.datastore.tests.helpers import (
set_url_type, DatastoreFunctionalTestBase, DatastoreLegacyTestBase)

assert_equal = nose.tools.assert_equal
set_url_type, DatastoreFunctionalTestBase, DatastoreLegacyTestBase,
when_was_last_analyze)


class TestDatastoreUpsert(DatastoreFunctionalTestBase):
Expand Down Expand Up @@ -135,6 +130,47 @@ def test_dry_run_trigger_error(self):
else:
assert 0, 'error not raised'

def test_calculate_record_count_is_false(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'fields': [{'id': 'name', 'type': 'text'},
{'id': 'age', 'type': 'text'}],
}
helpers.call_action('datastore_create', **data)
data = {
'resource_id': resource['id'],
'force': True,
'method': 'insert',
'records': [{"name": "Sunita", "age": "51"},
{"name": "Bowan", "age": "68"}],
}
helpers.call_action('datastore_upsert', **data)
last_analyze = when_was_last_analyze(resource['id'])
assert_equal(last_analyze, None)

def test_calculate_record_count(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'fields': [{'id': 'name', 'type': 'text'},
{'id': 'age', 'type': 'text'}],
}
helpers.call_action('datastore_create', **data)
data = {
'resource_id': resource['id'],
'force': True,
'method': 'insert',
'records': [{"name": "Sunita", "age": "51"},
{"name": "Bowan", "age": "68"}],
'calculate_record_count': True
}
helpers.call_action('datastore_upsert', **data)
last_analyze = when_was_last_analyze(resource['id'])
assert_not_equal(last_analyze, None)


class TestDatastoreUpsertLegacyTests(DatastoreLegacyTestBase):
sysadmin_user = None
Expand Down

0 comments on commit 6428c48

Please sign in to comment.