From 2fe4f679c08d6994804d3181849da3e096955551 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 2 Dec 2016 18:00:00 -0500 Subject: [PATCH 1/3] [#3344] datastore dump: internally paginate with datastore_search --- ckanext/datastore/controller.py | 57 +++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index b872c1d3a78..9cb3a9e2974 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -12,6 +12,9 @@ from ckan.common import request +PAGINATE_BY = 10000 + + class DatastoreController(base.BaseController): def dump(self, resource_id): context = { @@ -20,27 +23,33 @@ def dump(self, resource_id): 'user': p.toolkit.c.user } - data_dict = { - 'resource_id': resource_id, - 'limit': request.GET.get('limit', 100000), - 'offset': request.GET.get('offset', 0) - } - - action = p.toolkit.get_action('datastore_search') - try: - result = action(context, data_dict) - except p.toolkit.ObjectNotFound: - base.abort(404, p.toolkit._('DataStore resource not found')) - - pylons.response.headers['Content-Type'] = 'text/csv' - pylons.response.headers['Content-disposition'] = \ - 'attachment; filename="{name}.csv"'.format(name=resource_id) - f = StringIO.StringIO() - wr = csv.writer(f, encoding='utf-8') - - header = [x['id'] for x in result['fields']] - wr.writerow(header) - - for record in result['records']: - wr.writerow([record[column] for column in header]) - return f.getvalue() + offset = 0 + wr = None + while True: + data_dict = { + 'resource_id': resource_id, + 'limit': request.GET.get('limit', PAGINATE_BY), + 'offset': request.GET.get('offset', offset) + } + + action = p.toolkit.get_action('datastore_search') + try: + result = action(context, data_dict) + except p.toolkit.ObjectNotFound: + base.abort(404, p.toolkit._('DataStore resource not found')) + + if not wr: + pylons.response.headers['Content-Type'] = 'text/csv' + pylons.response.headers['Content-disposition'] = \ + 'attachment; filename="{name}.csv"'.format(name=resource_id) + wr = csv.writer(pylons.response, encoding='utf-8') + + header = [x['id'] for x in result['fields']] + wr.writerow(header) + + for record in result['records']: + wr.writerow([record[column] for column in header]) + + offset += PAGINATE_BY + if len(result['records']) < PAGINATE_BY: + break From e29e8cd76b99db449d713896ea22471cd3550372 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 2 Dec 2016 18:12:58 -0500 Subject: [PATCH 2/3] [#3344] pep8 --- ckanext/datastore/controller.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 9cb3a9e2974..7b5ce6e50df 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -40,8 +40,9 @@ def dump(self, resource_id): if not wr: pylons.response.headers['Content-Type'] = 'text/csv' - pylons.response.headers['Content-disposition'] = \ - 'attachment; filename="{name}.csv"'.format(name=resource_id) + pylons.response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.csv"'.format( + name=resource_id)) wr = csv.writer(pylons.response, encoding='utf-8') header = [x['id'] for x in result['fields']] From cb020f80368f5b903cdda6ddcd72d44122c96e14 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 16 Dec 2016 12:41:44 -0500 Subject: [PATCH 3/3] [#3344] handle offset, limit; better toolkit imports --- ckanext/datastore/controller.py | 63 ++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 7b5ce6e50df..97a53d98677 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -5,45 +5,56 @@ import pylons -import ckan.plugins as p -import ckan.lib.base as base -import ckan.model as model - -from ckan.common import request +from ckan.plugins.toolkit import ( + Invalid, + ObjectNotFound, + get_action, + get_validator, + _, + request, + response, + BaseController, + abort, +) +int_validator = get_validator('int_validator') PAGINATE_BY = 10000 -class DatastoreController(base.BaseController): +class DatastoreController(BaseController): def dump(self, resource_id): - context = { - 'model': model, - 'session': model.Session, - 'user': p.toolkit.c.user - } + try: + offset = int_validator(request.GET.get('offset', 0), {}) + except Invalid as e: + abort(400, u'offset: ' + e.error) + try: + limit = int_validator(request.GET.get('limit'), {}) + except Invalid as e: + abort(400, u'limit: ' + e.error) - offset = 0 wr = None while True: - data_dict = { - 'resource_id': resource_id, - 'limit': request.GET.get('limit', PAGINATE_BY), - 'offset': request.GET.get('offset', offset) - } + if limit is not None and limit <= 0: + break - action = p.toolkit.get_action('datastore_search') try: - result = action(context, data_dict) - except p.toolkit.ObjectNotFound: - base.abort(404, p.toolkit._('DataStore resource not found')) + result = get_action('datastore_search')(None, { + 'resource_id': resource_id, + 'limit': + PAGINATE_BY if limit is None + else min(PAGINATE_BY, limit), + 'offset': offset, + }) + except ObjectNotFound: + abort(404, _('DataStore resource not found')) if not wr: - pylons.response.headers['Content-Type'] = 'text/csv' - pylons.response.headers['Content-disposition'] = ( + response.headers['Content-Type'] = 'text/csv; charset=utf-8' + response.headers['Content-disposition'] = ( 'attachment; filename="{name}.csv"'.format( name=resource_id)) - wr = csv.writer(pylons.response, encoding='utf-8') + wr = csv.writer(response, encoding='utf-8') header = [x['id'] for x in result['fields']] wr.writerow(header) @@ -51,6 +62,8 @@ def dump(self, resource_id): for record in result['records']: wr.writerow([record[column] for column in header]) - offset += PAGINATE_BY if len(result['records']) < PAGINATE_BY: break + offset += PAGINATE_BY + if limit is not None: + limit -= PAGINATE_BY