From 57d6a4e6b112b4e159a4d93fdc9fa4626491d472 Mon Sep 17 00:00:00 2001 From: kindly Date: Wed, 9 Oct 2013 15:35:37 +0100 Subject: [PATCH 01/14] [#1273] resource upload with basic ui and mulitipart api uploading --- ckan/config/routing.py | 2 + ckan/controllers/api.py | 6 +- ckan/controllers/package.py | 34 ++++++++-- ckan/lib/dictization/model_dictize.py | 15 +++- ckan/lib/dictization/model_save.py | 4 +- ckan/lib/uploader.py | 68 ++++++++++++++++++- ckan/logic/action/create.py | 15 +++- ckan/logic/action/update.py | 10 ++- .../package/snippets/resource_form.html | 11 ++- 9 files changed, 149 insertions(+), 16 deletions(-) diff --git a/ckan/config/routing.py b/ckan/config/routing.py index 0fbb02bb886..91c766c3c89 100644 --- a/ckan/config/routing.py +++ b/ckan/config/routing.py @@ -250,6 +250,8 @@ def make_map(): action='resource_edit') m.connect('/dataset/{id}/resource/{resource_id}/download', action='resource_download') + m.connect('/dataset/{id}/resource/{resource_id}/download/{filename}', + action='resource_download') m.connect('/dataset/{id}/resource/{resource_id}/embed', action='resource_embedded_dataviewer') m.connect('/dataset/{id}/resource/{resource_id}/viewer', diff --git a/ckan/controllers/api.py b/ckan/controllers/api.py index 8f63e2779ca..90c78679bf4 100644 --- a/ckan/controllers/api.py +++ b/ckan/controllers/api.py @@ -828,7 +828,9 @@ def make_unicode(entity): cls.log.debug('Retrieving request POST: %r' % request.POST) cls.log.debug('Retrieving request GET: %r' % request.GET) request_data = None - if request.POST: + if request.POST and request.content_type == 'multipart/form-data': + request_data = dict(request.POST) + elif request.POST: try: keys = request.POST.keys() # Parsing breaks if there is a = in the value, so for now @@ -862,7 +864,7 @@ def make_unicode(entity): raise ValueError(msg) else: request_data = {} - if request_data: + if request_data and request.content_type != 'multipart/form-data': try: request_data = h.json.loads(request_data, encoding='utf8') except ValueError, e: diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py index d1852cd95b7..4f88c2d568e 100644 --- a/ckan/controllers/package.py +++ b/ckan/controllers/package.py @@ -1,11 +1,15 @@ import logging from urllib import urlencode import datetime +import os +import mimetypes +import cgi from pylons import config from genshi.template import MarkupTemplate from genshi.template.text import NewTextTemplate from paste.deploy.converters import asbool +import paste.fileapp import ckan.logic as logic import ckan.lib.base as base @@ -18,6 +22,7 @@ import ckan.model as model import ckan.lib.datapreview as datapreview import ckan.lib.plugins +import ckan.lib.uploader as uploader import ckan.plugins as p from ckan.common import OrderedDict, _, json, request, c, g, response @@ -516,7 +521,7 @@ def resource_edit(self, id, resource_id, data=None, errors=None, del data['save'] context = {'model': model, 'session': model.Session, - 'api_version': 3, + 'api_version': 3, 'for_edit': True, 'user': c.user or c.author, 'auth_user_obj': c.userobj} data['package_id'] = id @@ -537,7 +542,7 @@ def resource_edit(self, id, resource_id, data=None, errors=None, id=id, resource_id=resource_id)) context = {'model': model, 'session': model.Session, - 'api_version': 3, + 'api_version': 3, 'for_edit': True, 'user': c.user or c.author, 'auth_user_obj': c.userobj} pkg_dict = get_action('package_show')(context, {'id': id}) if pkg_dict['state'].startswith('draft'): @@ -587,7 +592,8 @@ def new_resource(self, id, data=None, errors=None, error_summary=None): # see if we have any data that we are trying to save data_provided = False for key, value in data.iteritems(): - if value and key != 'resource_type': + if ((value or isinstance(value, cgi.FieldStorage)) + and key != 'resource_type'): data_provided = True break @@ -1167,10 +1173,10 @@ def _resource_preview(self, data_dict): or datapreview.get_preview_plugin( data_dict, return_first=True)) - def resource_download(self, id, resource_id): + def resource_download(self, id, resource_id, filename=None): """ - Provides a direct download by redirecting the user to the url stored - against this resource. + Provides a direct download by either redirecting the user to the url stored + or downloading an uploaded file directly. """ context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj} @@ -1183,7 +1189,21 @@ def resource_download(self, id, resource_id): except NotAuthorized: abort(401, _('Unauthorized to read resource %s') % id) - if not 'url' in rsc: + if rsc.get('url_type') == 'upload': + upload = uploader.ResourceUpload(rsc) + filepath = upload.get_path(rsc['id']) + fileapp = paste.fileapp.FileApp(filepath) + try: + status, headers, app_iter = request.call_application(fileapp) + except OSError: + abort(404, _('Resource data not found')) + status, headers, app_iter = request.call_application(fileapp) + response.headers.update(dict(headers)) + content_type, content_enc = mimetypes.guess_type(rsc.get('url','')) + response.headers['Content-Type'] = content_type + response.status = status + return app_iter + elif not 'url' in rsc: abort(404, _('No download is available')) redirect(rsc['url']) diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 019e648140b..6bc5b08d2c1 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -139,14 +139,27 @@ def _unified_resource_format(format_): return format_new def resource_dictize(res, context): + model = context['model'] resource = d.table_dictize(res, context) + resource_group_id = resource['resource_group_id'] + resource_group = model.Session.query( + model.ResourceGroup).get(resource_group_id) extras = resource.pop("extras", None) if extras: resource.update(extras) resource['format'] = _unified_resource_format(res.format) # some urls do not have the protocol this adds http:// to these url = resource['url'] - if not urlparse.urlsplit(url).scheme: + if resource.get('url_type') == 'upload' and not context.get('for_edit'): + last_part = url.split('/')[-1] + cleaned_name = munge.munge_filename(last_part) + resource['url'] = h.url_for(controller='package', + action='resource_download', + id=resource_group.package_id, + resource_id=res.id, + filename=cleaned_name, + qualified=True) + elif not urlparse.urlsplit(url).scheme and not context.get('for_edit'): resource['url'] = u'http://' + url.lstrip('/') return resource diff --git a/ckan/lib/dictization/model_save.py b/ckan/lib/dictization/model_save.py index 4122172b837..7282bbb61f2 100644 --- a/ckan/lib/dictization/model_save.py +++ b/ckan/lib/dictization/model_save.py @@ -41,8 +41,8 @@ def resource_dict_save(res_dict, context): # this is an internal field so ignore # FIXME This helps get the tests to pass but is a hack and should # be fixed properly. basically don't update the format if not needed - if (key == 'format' and value == obj.format - or value == d.model_dictize._unified_resource_format(obj.format)): + if (key == 'format' and (value == obj.format + or value == d.model_dictize._unified_resource_format(obj.format))): continue setattr(obj, key, value) else: diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py index 9b7cea2922e..e4d4575e218 100644 --- a/ckan/lib/uploader.py +++ b/ckan/lib/uploader.py @@ -6,7 +6,9 @@ class Upload(object): def __init__(self, object_type, old_filename=None): - path = pylons.config.get('ckan.storage_path', '/tmp') + path = pylons.config.get('ckan.storage_path') + if not path: + return self.storage_path = os.path.join(path, 'storage', 'uploads', object_type) try: os.makedirs(self.storage_path) @@ -59,3 +61,67 @@ def upload(self): os.remove(self.old_filepath) except OSError, e: pass + + +class ResourceUpload(object): + def __init__(self, resource): + path = pylons.config.get('ckan.storage_path') + if not path: + return + self.storage_path = os.path.join(path, 'resources') + try: + os.makedirs(self.storage_path) + except OSError, e: + pass + self.filename = None + + url = resource.get('url') + upload_field_storage = resource.pop('upload', None) + self.clear = resource.pop('clear_upload', None) + + if isinstance(upload_field_storage, cgi.FieldStorage): + self.filename = upload_field_storage.filename + self.filename = munge.munge_filename(self.filename) + resource['url'] = self.filename + resource['url_type'] = 'upload' + self.upload_file = upload_field_storage.file + elif self.clear: + resource['url_type'] = '' + + + def get_directory(self, id): + directory = os.path.join(self.storage_path, + id[0:3], id[3:6]) + return directory + + def get_path(self, id): + directory = self.get_directory(id) + filepath = os.path.join(directory, id[6:]) + return filepath + + + def upload(self, resource): + id = resource['id'] + directory = self.get_directory(id) + filepath = self.get_path(id) + if self.filename: + try: + os.makedirs(directory) + except OSError, e: + pass + tmp_filepath = filepath + '~' + output_file = open(tmp_filepath, 'wb+') + self.upload_file.seek(0) + while True: + data = self.upload_file.read(2 ** 20) #mb chuncks + if not data: + break + output_file.write(data) + output_file.close() + os.rename(tmp_filepath, filepath) + + if self.clear: + try: + os.remove(filepath) + except OSError, e: + pass diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index 702a6028d99..209b9a2264e 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -235,6 +235,10 @@ def resource_create(context, data_dict): :type cache_last_updated: iso date string :param webstore_last_updated: (optional) :type webstore_last_updated: iso date string + :param upload: (optional) + :type upload: FieldStorage (optional) needs multipart/form-data + :param clear_upload: (optional) + :type clear_upload: boolean (optional) set to true to remove uplaoded file :returns: the newly created resource :rtype: dictionary @@ -252,15 +256,24 @@ def resource_create(context, data_dict): if not 'resources' in pkg_dict: pkg_dict['resources'] = [] + + upload = uploader.ResourceUpload(data_dict) + pkg_dict['resources'].append(data_dict) try: + context['defer_commit'] = True + context['use_cache'] = False pkg_dict = _get_action('package_update')(context, pkg_dict) except ValidationError, e: errors = e.error_dict['resources'][-1] raise ValidationError(errors) - return pkg_dict['resources'][-1] + resource = pkg_dict['resources'][-1] + upload.upload(resource) + model.repo.commit() + + return resource def related_create(context, data_dict): diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index f8aee9f62fb..d2bd52fc7c0 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -219,15 +219,23 @@ def resource_update(context, data_dict): else: logging.error('Could not find resource ' + id) raise NotFound(_('Resource was not found.')) + + upload = uploader.ResourceUpload(data_dict) + pkg_dict['resources'][n] = data_dict try: + context['defer_commit'] = True + context['use_cache'] = False pkg_dict = _get_action('package_update')(context, pkg_dict) except ValidationError, e: errors = e.error_dict['resources'][n] raise ValidationError(errors) - return pkg_dict['resources'][n] + resource = pkg_dict['resources'][n] + upload.upload(resource) + model.repo.commit() + return resource def package_update(context, data_dict): diff --git a/ckan/templates/package/snippets/resource_form.html b/ckan/templates/package/snippets/resource_form.html index f3bb2bddc91..1043eb09b87 100644 --- a/ckan/templates/package/snippets/resource_form.html +++ b/ckan/templates/package/snippets/resource_form.html @@ -4,7 +4,7 @@ {% set errors = errors or {} %} {% set action = form_action or h.url_for(controller='package', action='new_resource', id=pkg_name) %} -
+ {% block stages %} {# An empty stages variable will not show the stages #} {% if stage %} @@ -41,6 +41,15 @@ {{ form.input('url', id='field-url', label=_('Resource'), placeholder=_('eg. http://example.com/gold-prices-jan-2011.json'), value=data.url, error=errors.url, classes=['control-full', 'control-large'], is_required=true) }} {% endblock %} + {% block basic_fields_upload %} + {% if h.uploads_enabled() %} + {{ form.input('upload', label=_('Resource Upload'), id='field-upload', type='file', placeholder='', error='', classes=['control-full']) }} + {% if data.url_type == 'upload' %} + {{ form.checkbox('clear_upload', label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} + {% endif %} + {% endif %} + {% endblock %} + {% block basic_fields_name %} {{ form.input('name', id='field-name', label=_('Name'), placeholder=_('eg. January 2011 Gold Prices'), value=data.name, error=errors.name, classes=['control-full']) }} {% endblock %} From 4913975a2863e2a4ab12a42daef737c632fe9284 Mon Sep 17 00:00:00 2001 From: kindly Date: Tue, 12 Nov 2013 16:17:04 +0000 Subject: [PATCH 02/14] [#1273] Make sure new resource upload works, also fixes #994 --- ckan/lib/uploader.py | 20 +++++++++++++++----- ckan/logic/action/create.py | 11 ++++++++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py index 8991f497993..2a348d7150d 100644 --- a/ckan/lib/uploader.py +++ b/ckan/lib/uploader.py @@ -133,14 +133,16 @@ def upload(self, max_size=2): class ResourceUpload(object): def __init__(self, resource): - path = pylons.config.get('ckan.storage_path') + path = get_storage_path() if not path: return self.storage_path = os.path.join(path, 'resources') try: os.makedirs(self.storage_path) except OSError, e: - pass + ## errno 17 is file already exists + if e.errno != 17: + raise self.filename = None url = resource.get('url') @@ -168,23 +170,31 @@ def get_path(self, id): return filepath - def upload(self, resource): - id = resource['id'] + def upload(self, id, max_size=10): directory = self.get_directory(id) filepath = self.get_path(id) if self.filename: try: os.makedirs(directory) except OSError, e: - pass + ## errno 17 is file already exists + if e.errno != 17: + raise tmp_filepath = filepath + '~' output_file = open(tmp_filepath, 'wb+') self.upload_file.seek(0) + current_size = 0 while True: + current_size = current_size + 1 data = self.upload_file.read(2 ** 20) #mb chuncks if not data: break output_file.write(data) + if current_size > max_size: + os.remove(self.tmp_filepath) + raise logic.ValidationError( + {'upload': ['File upload too large']} + ) output_file.close() os.rename(tmp_filepath, filepath) diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index 97f56b689fb..c419cafc0d5 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -268,15 +268,20 @@ def resource_create(context, data_dict): try: context['defer_commit'] = True context['use_cache'] = False - pkg_dict = _get_action('package_update')(context, pkg_dict) + _get_action('package_update')(context, pkg_dict) except ValidationError, e: errors = e.error_dict['resources'][-1] raise ValidationError(errors) - resource = pkg_dict['resources'][-1] - upload.upload(resource) + ## Get out resource_id resource from model as it will not appear in + ## package_show until after commit + upload.upload(context['package'].resources[-1].id) model.repo.commit() + ## Run package show again to get out actual last_resource + pkg_dict = _get_action('package_show')(context, {'id': package_id}) + resource = pkg_dict['resources'][-1] + return resource From 2ec5fcfc934d08be502af27d3459400394cd030f Mon Sep 17 00:00:00 2001 From: kindly Date: Tue, 12 Nov 2013 19:24:12 +0000 Subject: [PATCH 03/14] [#1273] Add javascript to resource uploads --- ckan/logic/action/update.py | 2 +- .../base/javascript/modules/image-upload.js | 30 +++++++++------- ckan/templates/group/snippets/group_form.html | 8 +++-- ckan/templates/macros/form.html | 19 ++++++---- .../snippets/organization_form.html | 5 ++- .../package/snippets/resource_form.html | 35 +++---------------- 6 files changed, 44 insertions(+), 55 deletions(-) diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index 75955870cea..505003e0920 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -233,7 +233,7 @@ def resource_update(context, data_dict): raise ValidationError(errors) resource = pkg_dict['resources'][n] - upload.upload(resource) + upload.upload(resource['id']) model.repo.commit() return resource diff --git a/ckan/public/base/javascript/modules/image-upload.js b/ckan/public/base/javascript/modules/image-upload.js index 96308ed5764..1872a051fbb 100644 --- a/ckan/public/base/javascript/modules/image-upload.js +++ b/ckan/public/base/javascript/modules/image-upload.js @@ -6,16 +6,16 @@ this.ckan.module('image-upload', function($, _) { /* options object can be extended using data-module-* attributes */ options: { is_url: true, - has_image: false, - field_upload: 'input[name="image_upload"]', - field_url: 'input[name="image_url"]', - field_clear: 'input[name="clear_upload"]', + is_upload: false, + field_upload: 'image_upload', + field_url: 'image_url', + field_clear: 'clear_upload', + upload_label: '', i18n: { upload: _('From computer'), url: _('From web'), remove: _('Remove'), - label: _('Upload image'), - label_url: _('Image URL'), + upload_label: _('Upload image'), remove_tooltip: _('Reset this') }, template: [ @@ -38,14 +38,18 @@ this.ckan.module('image-upload', function($, _) { var options = this.options; // firstly setup the fields - this.input = $(options.field_upload, this.el); - this.field_url = $(options.field_url, this.el).parents('.control-group'); + var field_upload = 'input[name="' + options.field_upload + '"]'; + var field_url = 'input[name="' + options.field_url + '"]'; + var field_clear = 'input[name="' + options.field_clear + '"]'; + + this.input = $(field_upload, this.el); + this.field_url = $(field_url, this.el).parents('.control-group'); this.field_image = this.input.parents('.control-group'); // Is there a clear checkbox on the form already? - var checkbox = $(options.field_clear, this.el); + var checkbox = $(field_clear, this.el); if (checkbox.length > 0) { - options.has_image = true; + options.is_upload = true; checkbox.parents('.control-group').remove(); } @@ -75,7 +79,7 @@ this.ckan.module('image-upload', function($, _) { .insertBefore($('input', this.field_url)); // Update the main label - $('label[for="field-image-upload"]').text(this.i18n('label')); + $('label[for="field-image-upload"]').text(options.upload_label || this.i18n('upload_label')); // Setup the file input this.input @@ -96,7 +100,7 @@ this.ckan.module('image-upload', function($, _) { // Setup the initial state if (options.is_url) { this.changeState(this.state.web); - } else if (options.has_image) { + } else if (options.is_upload) { this.changeState(this.state.attached); } else { this.changeState(this.state.blank); @@ -139,7 +143,7 @@ this.ckan.module('image-upload', function($, _) { _onFromWeb: function() { this.changeState(this.state.web); $('input', this.field_url).focus(); - if (this.options.has_image) { + if (this.options.is_upload) { this.field_clear.val('true'); } }, diff --git a/ckan/templates/group/snippets/group_form.html b/ckan/templates/group/snippets/group_form.html index cd19912d66b..3f82aab46ba 100644 --- a/ckan/templates/group/snippets/group_form.html +++ b/ckan/templates/group/snippets/group_form.html @@ -19,7 +19,10 @@ {{ form.markdown('description', label=_('Description'), id='field-description', placeholder=_('A little information about my group...'), value=data.description, error=errors.description) }} - {{ form.image_upload(data, errors, image_url=c.group_dict.image_display_url, is_upload_enabled=h.uploads_enabled()) }} + {% set is_upload = data.image_url and not data.image_url.startswith('http') %} + {% set is_url = data.image_url and data.image_url.startswith('http') %} + + {{ form.image_upload(data, errors, is_upload_enabled=h.uploads_enabled(), is_url=is_url, is_upload=is_upload) }} {% endblock %} @@ -48,6 +51,8 @@ ) }} {% endfor %} {% endblock %} + + {{ form.required_message() }} {# Do not update datasets here {% block dataset_fields %} {% if data.packages %} @@ -70,7 +75,6 @@ #}
- {{ form.required_message() }} {% block delete_button %} {% if h.check_access('group_delete', {'id': data.id}) %} {% set locale = h.dump_json({'content': _('Are you sure you want to delete this Group?')}) %} diff --git a/ckan/templates/macros/form.html b/ckan/templates/macros/form.html index db93f046961..1d53a5df159 100644 --- a/ckan/templates/macros/form.html +++ b/ckan/templates/macros/form.html @@ -403,18 +403,23 @@ {{ form.image_upload(data, errors, is_upload_enabled=true) }} #} -{% macro image_upload(data, errors, field_url='image_url', field_upload='image_upload', field_clear='clear_upload', image_url=false, is_upload_enabled=false, placeholder=false) %} +{% macro image_upload(data, errors, field_url='image_url', field_upload='image_upload', field_clear='clear_upload', + is_url=false, is_upload=false, is_upload_enabled=false, placeholder=false, + url_label='', upload_label='') %} {% set placeholder = placeholder if placeholder else _('http://example.com/my-image.jpg') %} - {% set has_uploaded_data = data.get(field_url) and not data[field_url].startswith('http') %} - {% set is_url = data.get(field_url) and data[field_url].startswith('http') %} + {% set url_label = url_label or _('Image URL') %} + {% set upload_label = upload_label or _('Upload Image') %} - {% if is_upload_enabled %}
{% endif %} + {% if is_upload_enabled %} +
+ {% endif %} - {{ input(field_url, label=_('Image URL'), id='field-image-url', placeholder=placeholder, value=data.get(field_url), error=errors.get(field_url), classes=['control-full']) }} + {{ input(field_url, label=url_label, id='field-image-url', placeholder=placeholder, value=data.get(field_url), error=errors.get(field_url), classes=['control-full']) }} {% if is_upload_enabled %} - {{ input(field_upload, label=_('Image Upload'), id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }} - {% if has_uploaded_data %} + {{ input(field_upload, label=upload_label, id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }} + {% if is_uploadu%} {{ checkbox(field_clear, label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} {% endif %} {% endif %} diff --git a/ckan/templates/organization/snippets/organization_form.html b/ckan/templates/organization/snippets/organization_form.html index 0d62e36cac1..c5b47da55d9 100644 --- a/ckan/templates/organization/snippets/organization_form.html +++ b/ckan/templates/organization/snippets/organization_form.html @@ -19,7 +19,10 @@ {{ form.markdown('description', label=_('Description'), id='field-description', placeholder=_('A little information about my organization...'), value=data.description, error=errors.description) }} - {{ form.image_upload(data, errors, image_url=c.group_dict.image_display_url, is_upload_enabled=h.uploads_enabled()) }} + {% set is_upload = data.image_url and not data.image_url.startswith('http') %} + {% set is_url = data.image_url and data.image_url.startswith('http') %} + + {{ form.image_upload(data, errors, is_upload_enabled=h.uploads_enabled(), is_url=is_url, is_upload=is_upload) }} {% endblock %} diff --git a/ckan/templates/package/snippets/resource_form.html b/ckan/templates/package/snippets/resource_form.html index 1043eb09b87..5d327c3c0f4 100644 --- a/ckan/templates/package/snippets/resource_form.html +++ b/ckan/templates/package/snippets/resource_form.html @@ -16,38 +16,12 @@ -
- {% block basic_fields %} - - {% block basic_fields_data %} -
- {# - This block uses a slightly odd pattern. Unlike the rest of the radio - buttons which are wrapped _inside_ the labels here we place the label - after the input. This enables us to style the label based on the state - of the radio using css. eg. input[type=radio]+label {} - #} - - - - - - -
-
- {% endblock %} {% block basic_fields_url %} - {{ form.input('url', id='field-url', label=_('Resource'), placeholder=_('eg. http://example.com/gold-prices-jan-2011.json'), value=data.url, error=errors.url, classes=['control-full', 'control-large'], is_required=true) }} - {% endblock %} - - {% block basic_fields_upload %} - {% if h.uploads_enabled() %} - {{ form.input('upload', label=_('Resource Upload'), id='field-upload', type='file', placeholder='', error='', classes=['control-full']) }} - {% if data.url_type == 'upload' %} - {{ form.checkbox('clear_upload', label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} - {% endif %} - {% endif %} + {% set is_upload = (data.url_type == 'upload') %} + {{ form.image_upload(data, errors, field_url='url', field_upload='upload', field_clear='clear_upload', + is_upload_enabled=h.uploads_enabled(), is_url=data.url and not is_upload, is_upload=is_upload, + upload_label=_('File Upload'), url_label=_('URL')) }} {% endblock %} {% block basic_fields_name %} @@ -70,7 +44,6 @@ {{ form.required_message() }} - {% endblock %} {% block metadata_fields %} {% if include_metadata %} From 35acb45361c3e2eb750cf7f54d64023e5e9f728b Mon Sep 17 00:00:00 2001 From: kindly Date: Wed, 13 Nov 2013 13:58:38 +0000 Subject: [PATCH 04/14] [#1273] add size limits properly and add docs --- ckan/lib/uploader.py | 24 +++++++-- ckan/logic/action/create.py | 5 +- ckan/logic/action/update.py | 4 +- doc/configuration.rst | 103 +++++++++++------------------------- 4 files changed, 56 insertions(+), 80 deletions(-) diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py index 2a348d7150d..27c998dc24c 100644 --- a/ckan/lib/uploader.py +++ b/ckan/lib/uploader.py @@ -6,9 +6,13 @@ import logging import ckan.logic as logic + +config = pylons.config log = logging.getLogger(__name__) _storage_path = None +_max_resource_size = None +_max_image_size = None def get_storage_path(): @@ -17,9 +21,9 @@ def get_storage_path(): #None means it has not been set. False means not in config. if _storage_path is None: - storage_path = pylons.config.get('ckan.storage_path') - ofs_impl = pylons.config.get('ofs.impl') - ofs_storage_dir = pylons.config.get('ofs.storage_dir') + storage_path = config.get('ckan.storage_path') + ofs_impl = config.get('ofs.impl') + ofs_storage_dir = config.get('ofs.storage_dir') if storage_path: _storage_path = storage_path elif ofs_impl == 'pairtree' and ofs_storage_dir: @@ -39,6 +43,18 @@ def get_storage_path(): return _storage_path +def get_max_image_size(): + global _max_image_size + if _max_image_size is None: + _max_image_size = int(config.get('ckan.max_image_size', 2)) + return _max_image_size + +def get_max_resource_size(): + global _max_resource_size + if _max_resource_size is None: + _max_resource_size = int(config.get('ckan.max_resource_size', 10)) + return _max_resource_size + class Upload(object): def __init__(self, object_type, old_filename=None): @@ -191,7 +207,7 @@ def upload(self, id, max_size=10): break output_file.write(data) if current_size > max_size: - os.remove(self.tmp_filepath) + os.remove(tmp_filepath) raise logic.ValidationError( {'upload': ['File upload too large']} ) diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index c419cafc0d5..652a87d2f7d 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -275,7 +275,8 @@ def resource_create(context, data_dict): ## Get out resource_id resource from model as it will not appear in ## package_show until after commit - upload.upload(context['package'].resources[-1].id) + upload.upload(context['package'].resources[-1].id, + uploader.get_max_resource_size()) model.repo.commit() ## Run package show again to get out actual last_resource @@ -587,7 +588,7 @@ def _group_or_org_create(context, data_dict, is_org=False): logic.get_action('activity_create')(activity_create_context, activity_dict) - upload.upload() + upload.upload(uploader.get_max_image_size()) if not context.get('defer_commit'): model.repo.commit() context["group"] = group diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index 505003e0920..e1b59b5b072 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -233,7 +233,7 @@ def resource_update(context, data_dict): raise ValidationError(errors) resource = pkg_dict['resources'][n] - upload.upload(resource['id']) + upload.upload(resource['id'], uploader.get_max_resource_size()) model.repo.commit() return resource @@ -541,7 +541,7 @@ def _group_or_org_update(context, data_dict, is_org=False): # TODO: Also create an activity detail recording what exactly changed # in the group. - upload.upload() + upload.upload(uploader.get_max_image_size()) if not context.get('defer_commit'): model.repo.commit() diff --git a/doc/configuration.rst b/doc/configuration.rst index ec113ea9d4a..01b18660f92 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -971,20 +971,39 @@ For more information on theming, see :doc:`theming`. Storage Settings ---------------- -.. _ckan.storage.bucket: +.. _ckan.storage_path: -ckan.storage.bucket -^^^^^^^^^^^^^^^^^^^ +ckan.storage_path +^^^^^^^^^^^^^^^^^ Example:: - - ckan.storage.bucket = ckan + ckan.storage_path = /var/lib/ckan Default value: ``None`` -This changes the bucket name for the uploaded files. +This defines the location of where CKAN will store all uploaded data. + +ckan.max_resource_size +^^^^^^^^^^^^^^^^^^^^^^ + +Example:: + ckan.max_resource_size = 100 + +Default value: ``10`` + +The maximum in megabytes a resources upload can be. + +ckan.max_image_size +^^^^^^^^^^^^^^^^^^^^ + +Example:: + ckan.max_image_size = 10 + +Default value: ``2`` -.. _ckan.storage.max_content_length: +The maximum in megabytes an image upload can be. + +.. _ckan.storage.bucket: ckan.storage.max_content_length ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -996,6 +1015,7 @@ Example:: Default value: ``50000000`` This defines the maximum content size, in bytes, for uploads. +Depricated, please use ckan.max_resource_size_instead .. _ofs.impl: @@ -1010,6 +1030,9 @@ Default value: ``None`` Defines the storage backend used by CKAN: ``pairtree`` for local storage, ``s3`` for Amazon S3 Cloud Storage or ``google`` for Google Cloud Storage. Note that each of these must be accompanied by the relevant settings for each backend described below. +Depricated, only aviliable option is now pairtree. + + .. _ofs.storage_dir: ofs.storage_dir @@ -1023,72 +1046,8 @@ Default value: ``None`` Only used with the local storage backend. Use this to specify where uploaded files should be stored, and also to turn on the handling of file storage. The folder should exist, and will automatically be turned into a valid pairtree repository if it is not already. -.. _ckan.storage.key_prefix: - -ckan.storage.key_prefix -^^^^^^^^^^^^^^^^^^^^^^^ - -Example:: - - ckan.storage.key_prefix = ckan-file/ - -Default value: ``file/`` - -Only used with the local storage backend. This changes the prefix for the uploaded files. - -.. _ofs.aws_access_key_id: - -ofs.aws_access_key_id -^^^^^^^^^^^^^^^^^^^^^ - -Example:: - - ofs.aws_access_key_id = 022QF06E7MXBSH9DHM02 - -Default value: ``None`` - -Only used with the Amazon S3 storage backend. Configure with your AWS Access Key ID. - -.. _ofs.aws_secret_access_key: - -ofs.aws_secret_access_key -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Example:: - - ofs.aws_secret_access_key = kWcrlUX5JEDGM/LtmEENI/aVmYvHNif5zB+d9+ct - -Default value: ``None`` - -Only used with the Amazon S3 storage backend. Configure with your AWS Secret Access Key. - -.. _ofs.gs_access_key_id: - -ofs.gs_access_key_id -^^^^^^^^^^^^^^^^^^^^^ - -Example:: - - ofs.gs_access_key_id = GOOGTS7C7FUP3AIRVJTE - -Default value: ``None`` - -Only used with the Google storage backend. Configure with your Google Storage -Access Key ID. - -.. _ofs.gs_secret_access_key: - -ofs.gs_secret_access_key -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Example:: - - ofs.gs_secret_access_key = bGoa+V7g/yqDXvKRqq+JTFn4uQZbPiQJo4pf9RzJ - -Default value: ``None`` +Depricated, please use ckan.storage_path -Only used with the Google storage backend. Configure with your Google Storage -Secret Access Key. DataPusher Settings From 4629b4125d5042f0346bd53172a87b2c31b4cf95 Mon Sep 17 00:00:00 2001 From: kindly Date: Wed, 13 Nov 2013 14:03:05 +0000 Subject: [PATCH 05/14] [#1273] change template.ini and fix more docs --- ckan/config/deployment.ini_tmpl | 25 +++---------------------- doc/configuration.rst | 20 +++++--------------- 2 files changed, 8 insertions(+), 37 deletions(-) diff --git a/ckan/config/deployment.ini_tmpl b/ckan/config/deployment.ini_tmpl index 5d89ad2a155..f371e2a9e4f 100644 --- a/ckan/config/deployment.ini_tmpl +++ b/ckan/config/deployment.ini_tmpl @@ -121,28 +121,9 @@ ckan.feeds.author_link = ## Storage Settings -# Local file storage: -#ofs.impl = pairtree -#ofs.storage_dir = /var/lib/ckan/default - -# Google cloud storage: -#ofs.impl = google -#ofs.gs_access_key_id = -#ofs.gs_secret_access_key = - -# S3 cloud storage: -#ofs.impl = s3 -#ofs.aws_access_key_id = .... -#ofs.aws_secret_access_key = .... - -# 'Bucket' to use for file storage -#ckan.storage.bucket = default - -# Prefix for uploaded files (only used for pairtree) -#ckan.storage.key_prefix = file/ - -# The maximum content size, in bytes, for uploads -#ckan.storage.max_content_length = 50000000 +#ckan.storage_path = /var/lib/ckan +#ckan.max_resource_size = 10 +#ckan.max_image_size = 2 ## Datapusher settings diff --git a/doc/configuration.rst b/doc/configuration.rst index 01b18660f92..7e5141ce1d3 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -983,6 +983,8 @@ Default value: ``None`` This defines the location of where CKAN will store all uploaded data. +.. _ckan.max_resource_size: + ckan.max_resource_size ^^^^^^^^^^^^^^^^^^^^^^ @@ -993,6 +995,8 @@ Default value: ``10`` The maximum in megabytes a resources upload can be. +.. _ckan.max_image_size: + ckan.max_image_size ^^^^^^^^^^^^^^^^^^^^ @@ -1003,20 +1007,6 @@ Default value: ``2`` The maximum in megabytes an image upload can be. -.. _ckan.storage.bucket: - -ckan.storage.max_content_length -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Example:: - - ckan.storage.max_content_length = 500000 - -Default value: ``50000000`` - -This defines the maximum content size, in bytes, for uploads. -Depricated, please use ckan.max_resource_size_instead - .. _ofs.impl: ofs.impl @@ -1046,7 +1036,7 @@ Default value: ``None`` Only used with the local storage backend. Use this to specify where uploaded files should be stored, and also to turn on the handling of file storage. The folder should exist, and will automatically be turned into a valid pairtree repository if it is not already. -Depricated, please use ckan.storage_path +Depricated, please use ckan.storage_path. From 091eb3b257ada4c72595eb69546a3131478c56d9 Mon Sep 17 00:00:00 2001 From: kindly Date: Wed, 13 Nov 2013 23:45:53 +0000 Subject: [PATCH 06/14] [#1273] change filestore docs --- doc/conf.py | 1 + doc/filestore.rst | 164 +++++++++++++--------------------------------- 2 files changed, 46 insertions(+), 119 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 118df583f33..13d36bcbfe4 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -47,6 +47,7 @@ .. |sstore| replace:: |config_dir|/sstore .. |storage_parent_dir| replace:: /var/lib/ckan .. |storage_dir| replace:: |storage_parent_dir|/default +.. |storage_path| replace:: |storage_parent_dir|/default .. |reload_apache| replace:: sudo service apache2 reload .. |restart_apache| replace:: sudo service apache2 restart .. |solr| replace:: Solr diff --git a/doc/filestore.rst b/doc/filestore.rst index 498626dbec0..a2287ed6e8b 100644 --- a/doc/filestore.rst +++ b/doc/filestore.rst @@ -2,12 +2,18 @@ FileStore and File Uploads ========================== -CKAN allows users to upload files directly to file storage either on the local -file system or to online 'cloud' storage like Amazon S3 or Google Storage. The -uploaded files will be stored in the configured location. +CKAN allows users to upload files directly to it against a resource or images +displayed against groups and organizations. + +.. versionchanged:: 2.2 + Previous versions of CKAN used to allow uploads to remote cloud hosting but + we have simplified this to only alow local file uploads. This is to give + CKAN more control over the files and make access control possible. If you + are already using pairtree local file storage then you should keep your + current settings, without change. ------------------------------------------- -Setup the FileStore with Local File Storage +Setup File Uploads ------------------------------------------- To setup CKAN's FileStore with local file storage: @@ -16,157 +22,77 @@ To setup CKAN's FileStore with local file storage: .. parsed-literal:: - sudo mkdir -p |storage_dir| + sudo mkdir -p |storage_path| 2. Add the following lines to your CKAN config file, after the ``[app:main]`` line: .. parsed-literal:: - ofs.impl = pairtree - ofs.storage_dir = |storage_dir| + ckan.storage_dir = |storage_path| -3. Set the permissions of the ``storage_dir``. For example if you're running +3. Set the permissions of the ``storage_path``. For example if you're running CKAN with Apache, then Apache's user (``www-data`` on Ubuntu) must have - read, write and execute permissions for the ``storage_dir``: + read, write and execute permissions for the ``storage_path``: .. parsed-literal:: - sudo chown www-data |storage_dir| - sudo chmod u+rwx |storage_dir| - -4. Make sure you've set :ref:`ckan.site_url` in your config file. + sudo chown www-data |storage_path| + sudo chmod u+rwx |storage_path| -5. Restart your web server, for example to restart Apache: +4. Restart your web server, for example to restart Apache: .. parsed-literal:: |reload_apache| --------------------------------------- -Setup the FileStore with Cloud Storage --------------------------------------- - -Important: you must install boto library for cloud storage to function:: - - pip install boto - -In your config for google:: - - ## OFS configuration - ofs.impl = google - ofs.gs_access_key_id = GOOG.... - ofs.gs_secret_access_key = .... - -For S3:: - - ## OFS configuration - ofs.impl = s3 - ofs.aws_access_key_id = .... - ofs.aws_secret_access_key = .... - - ----------------------- FileStore Web Interface ----------------------- -Upload of files to storage is integrated directly into the the Dataset creation +Upload of files to storage is integrated directly into the Dataset creation and editing system with files being associated to Resources. -------------- +----------------------- FileStore API -------------- - -CKAN's FileStore API lets you upload files to CKAN's -:doc:`FileStore `. If you're looking for an example, -`ckanclient `_ contains -`Python code for uploading a file to CKAN using the FileStore API `_. - - -FileStore Metadata API -====================== - -The API is located at:: - - /api/storage/metadata/{label} - -It supports the following methods: - -* GET will return the metadata -* POST will add/update metadata -* PUT will replace metadata - -Metadata is a json dict of key values which for POST and PUT should be send in body of request. - -A standard response looks like:: - - { - "_bucket": "ckannet-storage", - _content_length: 1074 - _format: "text/plain" - _label: "/file/8630a664-0ae4-485f-99c2-126dae95653a" - _last_modified: "Fri, 29 Apr 2011 19:27:31 GMT" - _location: "some-location" - _owner: null - uploaded-by: "bff737ef-b84c-4519-914c-b4285144d8e6" - } - -Note that values with '_' are standard OFS metadata and are mostly read-only -- _format i.e. content-type can be set). - - -FileStore Form Authentication API -================================= - -Provides credentials for doing operations on storage directly from a client -(using web form style POSTs). - -The API is located at:: - - /api/storage/auth/form/{label} - -Provide fields for a form upload to storage including authentication:: - - :param label: label. - :return: json-encoded dictionary with action parameter and fields list. +----------------------- +.. versionchanged:: 2.2 + The previous API has been depricated although should still work if you where + using loca file storage. -FileStore Request Authentication API -==================================== +The api is part of the resource_create and resource_update action api +functions. You can post mutipart/form-data to the api and the key, value +pairs will treated as as if they are a json object. +The extra key ``upload`` is used to actually post the binary data. -Provides credentials for doing operations on storage directly from a client. +Curl automatically puts the multipart-form-data heading when using the +``--form`` option: -.. warning:: This API is currently disabled and will likely be deprecated. - Use the form authentication instead. + .. parsed-literal:: -The API is at:: + curl -H'Authorization: your-api-key' 'http://yourhost/api/action/resource_create' --form upload=@filetoupload --form package_id=my_dataset - /api/storage/auth/request/{label} +The python requests library used the files parameter and automatically sets +the multipart/form-data header too: -Provide authentication information for a request so a client can -interact with backend storage directly:: + .. parsed-literal:: - :param label: label. - :param kwargs: sent either via query string for GET or json-encoded - dict for POST). Interpreted as http headers for request plus an - (optional) method parameter (being the HTTP method). + import requests + requests.post('http://0.0.0.0:5000/api/action/resource_create', + data={"package_id":"my_dataset}", + headers={"X-CKAN-API-Key": "21a47217-6d7b-49c5-88f9-72ebd5a4d4bb"}, + files=[('upload', file('/path/to/file/to/upload.csv'))]) - Examples of headers are: +With resource_update, if you want to override a file you just need +to set the upload field again:: - Content-Type - Content-Encoding (optional) - Content-Length - Content-MD5 - Expect (should be '100-Continue') + curl -H'Authorization: your-api-key' 'http://yourhost/api/action/resource_update' --form upload=@newfiletoupload --form id=resourceid - :return: is a json hash containing various attributes including a - headers dictionary containing an Authorization field which is good for - 15m. +If you want to clear the upload and change it for a remote URL +there is special boolean field clear_upload to do this:: ---------------------- -DataStore Integration ---------------------- + curl -H'Authorization: your-api-key' 'http://yourhost/api/action/resource_update' --form url=http://expample.com --form clear_upload=true --form id=resourceid -It is also possible to have uploaded files (if of a suitable format) stored in -the DataStore which will then provides an API to the data. See :ref:`datastorer` for more details. From 57397c20b0ceae51b1081c0589c67c9594104e83 Mon Sep 17 00:00:00 2001 From: kindly Date: Tue, 19 Nov 2013 16:52:54 +0000 Subject: [PATCH 07/14] [#1273] fix some tests --- ckan/lib/dictization/model_dictize.py | 4 ++-- ckan/lib/uploader.py | 3 +++ ckan/logic/action/update.py | 2 +- ckan/tests/logic/test_action.py | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 80c43a4aadd..fd043e9e2d3 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -142,8 +142,6 @@ def resource_dictize(res, context): model = context['model'] resource = d.table_dictize(res, context) resource_group_id = resource['resource_group_id'] - resource_group = model.Session.query( - model.ResourceGroup).get(resource_group_id) extras = resource.pop("extras", None) if extras: resource.update(extras) @@ -151,6 +149,8 @@ def resource_dictize(res, context): # some urls do not have the protocol this adds http:// to these url = resource['url'] if resource.get('url_type') == 'upload' and not context.get('for_edit'): + resource_group = model.Session.query( + model.ResourceGroup).get(resource_group_id) last_part = url.split('/')[-1] cleaned_name = munge.munge_filename(last_part) resource['url'] = h.url_for(controller='package', diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py index 27c998dc24c..b46f186ccb9 100644 --- a/ckan/lib/uploader.py +++ b/ckan/lib/uploader.py @@ -151,6 +151,7 @@ class ResourceUpload(object): def __init__(self, resource): path = get_storage_path() if not path: + self.storage_path = None return self.storage_path = os.path.join(path, 'resources') try: @@ -187,6 +188,8 @@ def get_path(self, id): def upload(self, id, max_size=10): + if not self.storage_path: + return directory = self.get_directory(id) filepath = self.get_path(id) if self.filename: diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index e1b59b5b072..1d4dfb30efc 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -235,7 +235,7 @@ def resource_update(context, data_dict): resource = pkg_dict['resources'][n] upload.upload(resource['id'], uploader.get_max_resource_size()) model.repo.commit() - return resource + return _get_action('resource_show')(context, {'id': id}) def package_update(context, data_dict): diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py index 70857c5eda2..316d6533a5e 100644 --- a/ckan/tests/logic/test_action.py +++ b/ckan/tests/logic/test_action.py @@ -832,10 +832,10 @@ def test_19_update_resource(self): resource_updated.pop('url') resource_updated.pop('revision_id') - resource_updated.pop('revision_timestamp') + resource_updated.pop('revision_timestamp', None) resource_created.pop('url') resource_created.pop('revision_id') - resource_created.pop('revision_timestamp') + resource_created.pop('revision_timestamp', None) assert_equal(resource_updated, resource_created) def test_20_task_status_update(self): From 53d6a1f869eb3afdb289305ab31a355ea7fba987 Mon Sep 17 00:00:00 2001 From: kindly Date: Tue, 19 Nov 2013 17:20:04 +0000 Subject: [PATCH 08/14] [#1273] fix some more tests --- ckan/logic/action/update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index 1d4dfb30efc..32f7c7369a2 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -228,12 +228,12 @@ def resource_update(context, data_dict): context['defer_commit'] = True context['use_cache'] = False pkg_dict = _get_action('package_update')(context, pkg_dict) + context.pop('defer_commit') except ValidationError, e: errors = e.error_dict['resources'][n] raise ValidationError(errors) - resource = pkg_dict['resources'][n] - upload.upload(resource['id'], uploader.get_max_resource_size()) + upload.upload(id, uploader.get_max_resource_size()) model.repo.commit() return _get_action('resource_show')(context, {'id': id}) From edb65c01afce7b8c5d699e7d704d3ab05a1518f0 Mon Sep 17 00:00:00 2001 From: kindly Date: Wed, 20 Nov 2013 11:13:11 +0000 Subject: [PATCH 09/14] [#1273] pep8 --- ckan/lib/uploader.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py index b46f186ccb9..a9c843a0d0d 100644 --- a/ckan/lib/uploader.py +++ b/ckan/lib/uploader.py @@ -43,12 +43,14 @@ def get_storage_path(): return _storage_path + def get_max_image_size(): global _max_image_size if _max_image_size is None: _max_image_size = int(config.get('ckan.max_image_size', 2)) return _max_image_size + def get_max_resource_size(): global _max_resource_size if _max_resource_size is None: @@ -175,10 +177,9 @@ def __init__(self, resource): elif self.clear: resource['url_type'] = '' - def get_directory(self, id): directory = os.path.join(self.storage_path, - id[0:3], id[3:6]) + id[0:3], id[3:6]) return directory def get_path(self, id): @@ -186,7 +187,6 @@ def get_path(self, id): filepath = os.path.join(directory, id[6:]) return filepath - def upload(self, id, max_size=10): if not self.storage_path: return @@ -205,7 +205,8 @@ def upload(self, id, max_size=10): current_size = 0 while True: current_size = current_size + 1 - data = self.upload_file.read(2 ** 20) #mb chuncks + #MB chunks + data = self.upload_file.read(2 ** 20) if not data: break output_file.write(data) From d4ff6a8a929855300be19e99b2e38a68f923788a Mon Sep 17 00:00:00 2001 From: kindly Date: Wed, 20 Nov 2013 19:04:32 +0000 Subject: [PATCH 10/14] [#1273] fix datastore test --- ckan/logic/action/create.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index 652a87d2f7d..4c8433c306f 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -269,6 +269,7 @@ def resource_create(context, data_dict): context['defer_commit'] = True context['use_cache'] = False _get_action('package_update')(context, pkg_dict) + context.pop('defer_commit') except ValidationError, e: errors = e.error_dict['resources'][-1] raise ValidationError(errors) From f011d992d64c0a46ebd07be4e226f38e5a1fcf54 Mon Sep 17 00:00:00 2001 From: John Martin Date: Thu, 21 Nov 2013 11:32:05 +0000 Subject: [PATCH 11/14] [#1273] Removes old CSS for resource file type swtiching --- ckan/public/base/less/dataset.less | 38 ------------------------------ 1 file changed, 38 deletions(-) diff --git a/ckan/public/base/less/dataset.less b/ckan/public/base/less/dataset.less index 29b07d6b8a1..a102e3040a6 100644 --- a/ckan/public/base/less/dataset.less +++ b/ckan/public/base/less/dataset.less @@ -95,44 +95,6 @@ // Dataset Forms -.dataset-resource-form .dataset-form-resource-types { - margin-bottom: 5px; -} - -.dataset-form-resource-types .ckan-icon { - position: relative; - top: 3px; -} - -.dataset-form-resource-types .radio { - font-weight: normal; - padding-left: 0; - padding-right: 18px; -} - -.dataset-form-resource-types label { - position: relative; -} - -.dataset-form-resource-types input[type=radio]:checked+label { - font-weight: bold; -} - -.dataset-form-resource-types input[type=radio]:checked+label:after { - .ckan-icon; - .ckan-icon-callout; - display: block; - content: ""; - position: absolute; - top: auto; - left: 0; - bottom: -12px; -} - -.dataset-form-resource-types input[type=radio] { - display: none; -} - // Tag List .tag-list { From 042c161257891726f88971d6403651647760b14c Mon Sep 17 00:00:00 2001 From: kindly Date: Mon, 2 Dec 2013 13:39:50 +0000 Subject: [PATCH 12/14] [#1273] fix typos and minor issues from pull request --- ckan/controllers/package.py | 1 - ckan/lib/dictization/model_dictize.py | 2 ++ ckan/logic/action/create.py | 2 -- ckan/templates/macros/form.html | 2 +- doc/configuration.rst | 5 +++-- doc/filestore.rst | 7 ++++--- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py index f71da923d34..979b5b2bb3c 100644 --- a/ckan/controllers/package.py +++ b/ckan/controllers/package.py @@ -1234,7 +1234,6 @@ def resource_download(self, id, resource_id, filename=None): status, headers, app_iter = request.call_application(fileapp) except OSError: abort(404, _('Resource data not found')) - status, headers, app_iter = request.call_application(fileapp) response.headers.update(dict(headers)) content_type, content_enc = mimetypes.guess_type(rsc.get('url','')) response.headers['Content-Type'] = content_type diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index fd043e9e2d3..f00bea4c4b2 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -148,6 +148,8 @@ def resource_dictize(res, context): resource['format'] = _unified_resource_format(res.format) # some urls do not have the protocol this adds http:// to these url = resource['url'] + ## for_edit is only called at the times when the dataset is to be edited + ## without for_edit the whole qualified resource is returned. if resource.get('url_type') == 'upload' and not context.get('for_edit'): resource_group = model.Session.query( model.ResourceGroup).get(resource_group_id) diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index 4c8433c306f..9e50c0d8197 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -241,8 +241,6 @@ def resource_create(context, data_dict): :type webstore_last_updated: iso date string :param upload: (optional) :type upload: FieldStorage (optional) needs multipart/form-data - :param clear_upload: (optional) - :type clear_upload: boolean (optional) set to true to remove uplaoded file :returns: the newly created resource :rtype: dictionary diff --git a/ckan/templates/macros/form.html b/ckan/templates/macros/form.html index 1d53a5df159..0c0062c873b 100644 --- a/ckan/templates/macros/form.html +++ b/ckan/templates/macros/form.html @@ -419,7 +419,7 @@ {% if is_upload_enabled %} {{ input(field_upload, label=upload_label, id='field-image-upload', type='file', placeholder='', value='', error='', classes=['control-full']) }} - {% if is_uploadu%} + {% if is_upload %} {{ checkbox(field_clear, label=_('Clear Upload'), id='field-clear-upload', value='true', error='', classes=['control-full']) }} {% endif %} {% endif %} diff --git a/doc/configuration.rst b/doc/configuration.rst index 309eb325827..7c63a3f5a30 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -1020,7 +1020,7 @@ Default value: ``None`` Defines the storage backend used by CKAN: ``pairtree`` for local storage, ``s3`` for Amazon S3 Cloud Storage or ``google`` for Google Cloud Storage. Note that each of these must be accompanied by the relevant settings for each backend described below. -Depricated, only aviliable option is now pairtree. +Deprecated, only available option is now pairtree. This must be used nonetheless if upgrading for CKAN 2.1 in order to keep access to your old pairtree files. .. _ofs.storage_dir: @@ -1036,7 +1036,8 @@ Default value: ``None`` Only used with the local storage backend. Use this to specify where uploaded files should be stored, and also to turn on the handling of file storage. The folder should exist, and will automatically be turned into a valid pairtree repository if it is not already. -Depricated, please use ckan.storage_path. +Deprecated, please use ckan.storage_path. This must be used nonetheless if upgrading for CKAN 2.1 in order to keep access to your old pairtree files. + diff --git a/doc/filestore.rst b/doc/filestore.rst index 92a5023232b..f6138f85f9f 100644 --- a/doc/filestore.rst +++ b/doc/filestore.rst @@ -9,8 +9,9 @@ displayed against groups and organizations. Previous versions of CKAN used to allow uploads to remote cloud hosting but we have simplified this to only alow local file uploads. This is to give CKAN more control over the files and make access control possible. If you - are already using pairtree local file storage then you should keep your - current settings, without change. + are already using pairtree local file storage then you must keep your + current settings, otherwise users will not be also able to download the old + uploaded files. ------------------------------------------- Setup File Uploads @@ -60,7 +61,7 @@ FileStore API .. versionchanged:: 2.2 The previous API has been depricated although should still work if you where - using loca file storage. + using local file storage. The api is part of the resource_create and resource_update action api functions. You can post mutipart/form-data to the api and the key, value From 62b91b1bf522fad39ad0a2d0a9b7c6c16b2b2cca Mon Sep 17 00:00:00 2001 From: kindly Date: Tue, 3 Dec 2013 00:55:22 +0000 Subject: [PATCH 13/14] [#1273] add filestore migration script --- ckan/lib/cli.py | 42 ++++++++++++++++++++++++++++++++++++++++++ doc/filestore.rst | 31 ++++++++++++++++++++++++++----- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 72d971dbb71..3378bce73e9 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -130,6 +130,7 @@ class ManageDb(CkanCommand): db load-only FILE_PATH - load a pg_dump from a file but don\'t do the schema upgrade or search indexing db create-from-model - create database from the model (indexes not made) + db migrate-filestore - migrate all uploaded data from the 2.1 filesore. ''' summary = __doc__.split('\n')[0] usage = __doc__ @@ -187,6 +188,8 @@ def command(self): print 'Creating DB: SUCCESS' elif cmd == 'send-rdf': self.send_rdf() + elif cmd == 'migrate-filestore': + self.migrate_filestore() else: print 'Command %s not recognized' % cmd sys.exit(1) @@ -319,6 +322,45 @@ def send_rdf(self): talis = ckan.lib.talis.Talis() return talis.send_rdf(talis_store, username, password) + def migrate_filestore(self): + from ckan.model import Session + import requests + from ckan.lib.uploader import ResourceUpload + results = Session.execute("select id, revision_id, url from resource " + "where resource_type = 'file.upload' " + "and (url_type <> 'upload' or url_type is null)" + "and url like '%storage%'") + for id, revision_id, url in results: + response = requests.get(url, stream=True) + if response.status_code != 200: + print "failed to fetch %s (code %s)" % (url, + response.status_code) + continue + resource_upload = ResourceUpload({'id': id}) + assert resource_upload.storage_path, "no storage configured aborting" + + directory = resource_upload.get_directory(id) + filepath = resource_upload.get_path(id) + try: + os.makedirs(directory) + except OSError, e: + ## errno 17 is file already exists + if e.errno != 17: + raise + + with open(filepath, 'wb+') as out: + for chunk in response.iter_content(1024): + if chunk: + out.write(chunk) + + Session.execute("update resource set url_type = 'upload'" + "where id = '%s'" % id) + Session.execute("update resource_revision set url_type = 'upload'" + "where id = '%s' and " + "revision_id = '%s'" % (id, revision_id)) + Session.commit() + print "Saved url %s" % url + def version(self): from ckan.model import Session print Session.execute('select version from migrate_version;').fetchall() diff --git a/doc/filestore.rst b/doc/filestore.rst index f6138f85f9f..4c5e168302d 100644 --- a/doc/filestore.rst +++ b/doc/filestore.rst @@ -7,11 +7,9 @@ displayed against groups and organizations. .. versionchanged:: 2.2 Previous versions of CKAN used to allow uploads to remote cloud hosting but - we have simplified this to only alow local file uploads. This is to give - CKAN more control over the files and make access control possible. If you - are already using pairtree local file storage then you must keep your - current settings, otherwise users will not be also able to download the old - uploaded files. + we have simplified this to only alow local file uploads (see below for + details on how to migrate). This is to give CKAN more control over the files + and make access control possible. ------------------------------------------- Setup File Uploads @@ -99,3 +97,26 @@ there is special boolean field clear_upload to do this:: It is also possible to have uploaded files (if of a suitable format) stored in the DataStore which will then provides an API to the data. See :ref:`datapusher` for more details. +-------------------------- +Migration from 2.1 to 2.2 +-------------------------- + +If you are using pairtree local file storage then you can keep your current settings +without issue. The pairtree and new storage can live side by side but you are still +encouraged to migrate. If you change your config options to the ones specified in +this docs you will need to run the migration below. + +If you are running remote storage then all previous links will still be accessible +but if you want to move the remote storage documents to the local storage you will +run the migration also. + +In order to migrate make sure your CKAN instance is running as the script will +request the data from the instance using apis. You need to run the following +on the command line todo the migration:: + + paster db migrate-filestore + +This may take a long time especially if you have a lot of files remotely. +If the remote hosting goes down or the job is interrupted it is save to run it again +and it will try all the unsuccessful ones again. + From 9416adc0c9f7e54b5573a99a348e34b17f421154 Mon Sep 17 00:00:00 2001 From: kindly Date: Tue, 3 Dec 2013 16:48:40 +0000 Subject: [PATCH 14/14] [#1273] fix some typos --- ckan/lib/dictization/model_dictize.py | 2 +- doc/filestore.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index f00bea4c4b2..faf0df48f7c 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -149,7 +149,7 @@ def resource_dictize(res, context): # some urls do not have the protocol this adds http:// to these url = resource['url'] ## for_edit is only called at the times when the dataset is to be edited - ## without for_edit the whole qualified resource is returned. + ## in the frontend. Without for_edit the whole qualified url is returned. if resource.get('url_type') == 'upload' and not context.get('for_edit'): resource_group = model.Session.query( model.ResourceGroup).get(resource_group_id) diff --git a/doc/filestore.rst b/doc/filestore.rst index 4c5e168302d..7e38e31658c 100644 --- a/doc/filestore.rst +++ b/doc/filestore.rst @@ -58,7 +58,7 @@ FileStore API ----------------------- .. versionchanged:: 2.2 - The previous API has been depricated although should still work if you where + The previous API has been deprecated although should still work if you where using local file storage. The api is part of the resource_create and resource_update action api