Skip to content

Commit

Permalink
Merge pull request #3344 from ckan/3344-big-dumps
Browse files Browse the repository at this point in the history
Allow larger datastore dumps
  • Loading branch information
amercader committed Dec 17, 2016
2 parents d250fcf + cb020f8 commit ccb8dd5
Showing 1 changed file with 57 additions and 34 deletions.
91 changes: 57 additions & 34 deletions ckanext/datastore/controller.py
Expand Up @@ -5,42 +5,65 @@

import pylons

import ckan.plugins as p
import ckan.lib.base as base
import ckan.model as model
from ckan.plugins.toolkit import (
Invalid,
ObjectNotFound,
get_action,
get_validator,
_,
request,
response,
BaseController,
abort,
)

from ckan.common import request
int_validator = get_validator('int_validator')

PAGINATE_BY = 10000

class DatastoreController(base.BaseController):

class DatastoreController(BaseController):
def dump(self, resource_id):
context = {
'model': model,
'session': model.Session,
'user': p.toolkit.c.user
}

data_dict = {
'resource_id': resource_id,
'limit': request.GET.get('limit', 100000),
'offset': request.GET.get('offset', 0)
}

action = p.toolkit.get_action('datastore_search')
try:
result = action(context, data_dict)
except p.toolkit.ObjectNotFound:
base.abort(404, p.toolkit._('DataStore resource not found'))

pylons.response.headers['Content-Type'] = 'text/csv'
pylons.response.headers['Content-disposition'] = \
'attachment; filename="{name}.csv"'.format(name=resource_id)
f = StringIO.StringIO()
wr = csv.writer(f, encoding='utf-8')

header = [x['id'] for x in result['fields']]
wr.writerow(header)

for record in result['records']:
wr.writerow([record[column] for column in header])
return f.getvalue()
offset = int_validator(request.GET.get('offset', 0), {})
except Invalid as e:
abort(400, u'offset: ' + e.error)
try:
limit = int_validator(request.GET.get('limit'), {})
except Invalid as e:
abort(400, u'limit: ' + e.error)

wr = None
while True:
if limit is not None and limit <= 0:
break

try:
result = get_action('datastore_search')(None, {
'resource_id': resource_id,
'limit':
PAGINATE_BY if limit is None
else min(PAGINATE_BY, limit),
'offset': offset,
})
except ObjectNotFound:
abort(404, _('DataStore resource not found'))

if not wr:
response.headers['Content-Type'] = 'text/csv; charset=utf-8'
response.headers['Content-disposition'] = (
'attachment; filename="{name}.csv"'.format(
name=resource_id))
wr = csv.writer(response, encoding='utf-8')

header = [x['id'] for x in result['fields']]
wr.writerow(header)

for record in result['records']:
wr.writerow([record[column] for column in header])

if len(result['records']) < PAGINATE_BY:
break
offset += PAGINATE_BY
if limit is not None:
limit -= PAGINATE_BY

0 comments on commit ccb8dd5

Please sign in to comment.