diff --git a/ckan/authz.py b/ckan/authz.py index 65f0a335787..343a351adc0 100644 --- a/ckan/authz.py +++ b/ckan/authz.py @@ -47,6 +47,7 @@ def is_authorized(cls, username, action, domain_object): if isinstance(username, str): username = username.decode('utf8') assert isinstance(username, unicode), type(username) + for extension in cls.extensions: authorized = extension.is_authorized(username, action, diff --git a/ckan/ckan_nose_plugin.py b/ckan/ckan_nose_plugin.py index 785d3543def..0666b7761c3 100644 --- a/ckan/ckan_nose_plugin.py +++ b/ckan/ckan_nose_plugin.py @@ -47,10 +47,21 @@ def options(self, parser, env): '--ckan-migration', action='store_true', dest='ckan_migration', - help='set this when wanting to test migrations') + help='set this when wanting to test migrations') + parser.add_option( + '--docstrings', + action='store_true', + dest='docstrings', + help='set this to display test docstrings instead of module names') def configure(self, settings, config): CkanNose.settings = settings if settings.is_ckan: self.enabled = True self.is_first_test = True + + def describeTest(self, test): + if not CkanNose.settings.docstrings: + # display module name instead of docstring + return False + diff --git a/ckan/config/routing.py b/ckan/config/routing.py index db13789d380..46e7862ed20 100644 --- a/ckan/config/routing.py +++ b/ckan/config/routing.py @@ -9,6 +9,7 @@ from routes import Mapper from ckan.plugins import PluginImplementations, IRoutes + routing_plugins = PluginImplementations(IRoutes) def make_map(): @@ -280,6 +281,43 @@ def make_map(): map.connect('ckanadmin_index', '/ckan-admin', controller='admin', action='index') map.connect('ckanadmin', '/ckan-admin/{action}', controller='admin') + # Storage routes + map.connect('storage_api', "/api/storage", + controller='ckan.controllers.storage:StorageAPIController', + action='index') + map.connect('storage_api_set_metadata', '/api/storage/metadata/{label:.*}', + controller='ckan.controllers.storage:StorageAPIController', + action='set_metadata', + conditions={'method': ['PUT','POST']}) + map.connect('storage_api_get_metadata', '/api/storage/metadata/{label:.*}', + controller='ckan.controllers.storage:StorageAPIController', + action='get_metadata', + conditions={'method': ['GET']}) + map.connect('storage_api_auth_request', + '/api/storage/auth/request/{label:.*}', + controller='ckan.controllers.storage:StorageAPIController', + action='auth_request') + map.connect('storage_api_auth_form', + '/api/storage/auth/form/{label:.*}', + controller='ckan.controllers.storage:StorageAPIController', + action='auth_form') + map.connect('storage_upload', '/storage/upload', + controller='ckan.controllers.storage:StorageController', + action='upload') + map.connect('storage_upload_handle', '/storage/upload_handle', + controller='ckan.controllers.storage:StorageController', + action='upload_handle') + map.connect('storage_upload_success', '/storage/upload/success', + controller='ckan.controllers.storage:StorageController', + action='success') + map.connect('storage_upload_success_empty', '/storage/upload/success_empty', + controller='ckan.controllers.storage:StorageController', + action='success_empty') + map.connect('storage_file', '/storage/f/{label:.*}', + controller='ckan.controllers.storage:StorageController', + action='file') + + for plugin in routing_plugins: map = plugin.after_map(map) diff --git a/ckan/controllers/storage.py b/ckan/controllers/storage.py new file mode 100644 index 00000000000..05c2140a5e6 --- /dev/null +++ b/ckan/controllers/storage.py @@ -0,0 +1,399 @@ +import os +import re +import urllib +import uuid +from datetime import datetime +from cgi import FieldStorage + +from ofs import get_impl +from pylons import request, response +from pylons.controllers.util import abort, redirect_to +from pylons import config +from paste.fileapp import FileApp +from paste.deploy.converters import asbool + +from ckan.lib.base import BaseController, c, request, render, config, h, abort +from ckan.lib.jsonp import jsonpify +import ckan.model as model +import ckan.authz as authz + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO +try: + import json +except: + import simplejson as json + +from logging import getLogger +log = getLogger(__name__) + + +# pairtree_version0_1 file for identifying folders +BUCKET = config['storage.bucket'] +key_prefix = config.get('storage.key_prefix', 'file/') +storage_dir = config.get('storage.directory', '') + +_eq_re = re.compile(r"^(.*)(=[0-9]*)$") +def fix_stupid_pylons_encoding(data): + """ + Fix an apparent encoding problem when calling request.body + TODO: Investigate whether this is fixed in later versions? + """ + if data.startswith("%") or data.startswith("+"): + data = urllib.unquote_plus(data) + m = _eq_re.match(data) + if m: + data = m.groups()[0] + return data + + +def get_ofs(): + """ + Return a configured instance of the appropriate OFS driver, in all + cases here this will be the local file storage so we fix the implementation + to use pairtree. + """ + return get_impl("pairtree")(storage_dir=storage_dir) + + +pairtree_marker_done = False +def create_pairtree_marker(): + """ + Make sure that the file pairtree_version0_1 is present in storage_dir + and if not then create it. + """ + global pairtree_marker_done + if pairtree_marker_done or not storage_dir: + return + + path = os.path.join(storage_dir, 'pairtree_version0_1') + if not os.path.exists( path ): + open(path, 'w').close() + + pairtree_marker_done = True + + + +def authorize(method, bucket, key, user, ofs): + """ + Check authz for the user with a given bucket/key combo within a + particular ofs implementation. + """ + if not method in ['POST', 'GET', 'PUT', 'DELETE']: + abort(400) + if method != 'GET': + # do not allow overwriting + if ofs.exists(bucket, key): + abort(409) + # now check user stuff + username = user.name if user else '' + is_authorized = authz.Authorizer.is_authorized(username, 'file-upload', model.System()) + if not is_authorized: + h.flash_error('Not authorized to upload files.') + abort(401) + + + +class StorageController(BaseController): + '''Upload to storage backend. + ''' + def __before__(self, action, **params): + super(StorageController, self).__before__(action, **params) + if not storage_dir: + abort(404) + else: + create_pairtree_marker() + + @property + def ofs(self): + return get_ofs() + + + def upload(self): + label = key_prefix + request.params.get('filepath', str(uuid.uuid4())) + c.data = { + 'action': h.url_for('storage_upload_handle'), + 'fields': [ + { + 'name': 'key', + 'value': label + } + ] + } + return render('storage/index.html') + + def upload_handle(self): + bucket_id = BUCKET + params = dict(request.params.items()) + stream = params.get('file') + label = params.get('key') + authorize('POST', BUCKET, label, c.userobj, self.ofs) + if not label: + abort(400, "No label") + if not isinstance(stream, FieldStorage): + abort(400, "No file stream.") + del params['file'] + params['filename-original'] = stream.filename + #params['_owner'] = c.userobj.name if c.userobj else "" + params['uploaded-by'] = c.userobj.name if c.userobj else "" + + self.ofs.put_stream(bucket_id, label, stream.file, params) + success_action_redirect = h.url_for('storage_upload_success', qualified=True, + bucket=BUCKET, label=label) + # Do not redirect here as it breaks js file uploads (get infinite loop + # in FF and crash in Chrome) + return self.success(label) + + def success(self, label=None): + label=request.params.get('label', label) + h.flash_success('Upload successful') + c.file_url = h.url_for('storage_file', + label=label, + qualified=True + ) + c.upload_url = h.url_for('storage_upload') + return render('storage/success.html') + + def success_empty(self, label=None): + # very simple method that just returns 200 OK + return '' + + def file(self, label): + exists = self.ofs.exists(BUCKET, label) + if not exists: + # handle erroneous trailing slash by redirecting to url w/o slash + if label.endswith('/'): + label = label[:-1] + # This may be best being cached_url until we have moved it into + # permanent storage + file_url = h.url_for( 'storage_file', label=label ) + h.redirect_to(file_url) + else: + abort(404) + + file_url = self.ofs.get_url(BUCKET, label) + if file_url.startswith("file://"): + metadata = self.ofs.get_metadata(BUCKET, label) + filepath = file_url[len("file://"):] + headers = { + # 'Content-Disposition':'attachment; filename="%s"' % label, + 'Content-Type':metadata.get('_format', 'text/plain')} + fapp = FileApp(filepath, headers=None, **headers) + return fapp(request.environ, self.start_response) + else: + h.redirect_to(file_url) + + + +class StorageAPIController(BaseController): + + def __before__(self, action, **params): + super(StorageAPIController, self).__before__(action, **params) + if not storage_dir: + abort(404) + else: + create_pairtree_marker() + + @property + def ofs(self): + return get_ofs() + + @jsonpify + def index(self): + info = { + 'metadata/{label}': { + 'description': 'Get or set metadata for this item in storage', + }, + 'auth/request/{label}': { + 'description': self.auth_request.__doc__, + }, + 'auth/form/{label}': { + 'description': self.auth_form.__doc__, + } + } + return info + + def set_metadata(self, label): + bucket = BUCKET + if not label.startswith("/"): label = "/" + label + + try: + data = fix_stupid_pylons_encoding(request.body) + if data: + metadata = json.loads(data) + else: + metadata = {} + except: + abort(400) + + try: + b = self.ofs._require_bucket(bucket) + except: + abort(409) + + k = self.ofs._get_key(b, label) + if k is None: + k = b.new_key(label) + metadata = metadata.copy() + metadata["_creation_time"] = str(datetime.utcnow()) + self.ofs._update_key_metadata(k, metadata) + k.set_contents_from_file(StringIO('')) + elif request.method == "PUT": + old = self.ofs.get_metadata(bucket, label) + to_delete = [] + for ok in old.keys(): + if ok not in metadata: + to_delete.append(ok) + if to_delete: + self.ofs.del_metadata_keys(bucket, label, to_delete) + self.ofs.update_metadata(bucket, label, metadata) + else: + self.ofs.update_metadata(bucket, label, metadata) + + k.make_public() + k.close() + + return self.get_metadata(bucket, label) + + @jsonpify + def get_metadata(self, label): + bucket = BUCKET + url = h.url_for('storage_file', + label=label, + qualified=True + ) + if not self.ofs.exists(bucket, label): + abort(404) + metadata = self.ofs.get_metadata(bucket, label) + metadata["_location"] = url + return metadata + + @jsonpify + def auth_request(self, label): + '''Provide authentication information for a request so a client can + interact with backend storage directly. + + :param label: label. + :param kwargs: sent either via query string for GET or json-encoded + dict for POST). Interpreted as http headers for request plus an + (optional) method parameter (being the HTTP method). + + Examples of headers are: + + Content-Type + Content-Encoding (optional) + Content-Length + Content-MD5 + Expect (should be '100-Continue') + + :return: is a json hash containing various attributes including a + headers dictionary containing an Authorization field which is good for + 15m. + + ''' + bucket = BUCKET + if request.POST: + try: + data = fix_stupid_pylons_encoding(request.body) + headers = json.loads(data) + except Exception, e: + from traceback import print_exc + msg = StringIO() + print_exc(msg) + log.error(msg.seek(0).read()) + abort(400) + else: + headers = dict(request.params) + if 'method' in headers: + method = headers['method'] + del headers['method'] + else: + method = 'POST' + + authorize(method, bucket, label, c.userobj, self.ofs) + + http_request = self.ofs.authenticate_request(method, bucket, label, + headers) + return { + 'host': http_request.host, + 'method': http_request.method, + 'path': http_request.path, + 'headers': http_request.headers + } + + def _get_remote_form_data(self, label): + method = 'POST' + content_length_range = int( + config.get('ckanext.storage.max_content_length', + 50000000)) + acl = 'public-read' + fields = [ { + 'name': self.ofs.conn.provider.metadata_prefix + 'uploaded-by', + 'value': c.userobj.name + }] + conditions = [ '{"%s": "%s"}' % (x['name'], x['value']) for x in + fields ] + # In FF redirect to this breaks js upload as FF attempts to open file + # (presumably because mimetype = javascript) and this stops js + # success_action_redirect = h.url_for('storage_api_get_metadata', qualified=True, + # label=label) + success_action_redirect = h.url_for('storage_upload_success_empty', qualified=True, + label=label) + data = self.ofs.conn.build_post_form_args( + BUCKET, + label, + expires_in=72000, + max_content_length=content_length_range, + success_action_redirect=success_action_redirect, + acl=acl, + fields=fields, + conditions=conditions + ) + # HACK: fix up some broken stuff from boto + # e.g. should not have content-length-range in list of fields! + for idx,field in enumerate(data['fields']): + if field['name'] == 'content-length-range': + del data['fields'][idx] + return data + + def _get_form_data(self, label): + data = { + 'action': h.url_for('storage_upload_handle', qualified=True), + 'fields': [ + { + 'name': 'key', + 'value': label + } + ] + } + return data + + @jsonpify + def auth_form(self, label): + '''Provide fields for a form upload to storage including + authentication. + + :param label: label. + :return: json-encoded dictionary with action parameter and fields list. + ''' + bucket = BUCKET + if request.POST: + try: + data = fix_stupid_pylons_encoding(request.body) + headers = json.loads(data) + except Exception, e: + from traceback import print_exc + msg = StringIO() + print_exc(msg) + log.error(msg.seek(0).read()) + abort(400) + else: + headers = dict(request.params) + + method = 'POST' + authorize(method, bucket, label, c.userobj, self.ofs) + data = self._get_form_data(label) + return data + diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index b28bef48df6..7299b3657cc 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -168,6 +168,7 @@ def resource_update(context, data_dict): context["resource"] = resource if not resource: + logging.error('Could not find resource ' + id) raise NotFound(_('Resource was not found.')) check_access('resource_update', context, data_dict) @@ -183,7 +184,7 @@ def resource_update(context, data_dict): if 'message' in context: rev.message = context['message'] else: - rev.message = _(u'REST API: Update object %s') % data.get("name") + rev.message = _(u'REST API: Update object %s') % data.get("name", "") resource = resource_dict_save(data, context) if not context.get('defer_commit'): @@ -420,7 +421,7 @@ def task_status_update(context, data_dict): if task_status is None: raise NotFound(_('TaskStatus was not found.')) - + check_access('task_status_update', context, data_dict) data, errors = validate(data_dict, schema, context) diff --git a/ckan/logic/auth/update.py b/ckan/logic/auth/update.py index 1206be19c10..b2b15fc6831 100644 --- a/ckan/logic/auth/update.py +++ b/ckan/logic/auth/update.py @@ -1,5 +1,5 @@ from ckan.logic import check_access_old, NotFound -from ckan.logic.auth import get_package_object, get_group_object, get_authorization_group_object, \ +from ckan.logic.auth import get_package_object, get_resource_object, get_group_object, get_authorization_group_object, \ get_user_object, get_resource_object from ckan.logic.auth.create import check_group_auth, package_relationship_create from ckan.authz import Authorizer @@ -150,6 +150,9 @@ def task_status_update(context, data_dict): model = context['model'] user = context['user'] + if 'ignore_auth' in context and context['ignore_auth']: + return {'success': True} + authorized = Authorizer().is_sysadmin(unicode(user)) if not authorized: return {'success': False, 'msg': _('User %s not authorized to update task_status table') % str(user)} diff --git a/ckan/model/__init__.py b/ckan/model/__init__.py index 481c8371fde..d2b5d839305 100644 --- a/ckan/model/__init__.py +++ b/ckan/model/__init__.py @@ -285,26 +285,6 @@ def _get_revision_user(self): Revision.groups = property(_get_groups) Revision.user = property(_get_revision_user) -def strptimestamp(s): - '''Convert a string of an ISO date into a datetime.datetime object. - - raises TypeError if the number of numbers in the string is not between 3 - and 7 (see datetime constructor). - raises ValueError if any of the numbers are out of range. - ''' - # TODO: METHOD DEPRECATED - use ckan.lib.helpers.date_str_to_datetime - log.warn('model.strptimestamp is deprecated - use ckan.lib.helpers.date_str_to_datetime instead') - import datetime, re - return datetime.datetime(*map(int, re.split('[^\d]', s))) - -def strftimestamp(t): - '''Takes a datetime.datetime and returns it as an ISO string. For - a pretty printed string, use ckan.lib.helpers.render_datetime. - ''' - # TODO: METHOD DEPRECATED - use ckan.lib.helpers.datetime_to_date_str - log.warn('model.strftimestamp is deprecated - use ckan.lib.helpers.datetime_to_date_str instead') - return t.isoformat() - def revision_as_dict(revision, include_packages=True, include_groups=True,ref_package_by='name'): revision_dict = OrderedDict(( ('id', revision.id), diff --git a/ckan/model/authz.py b/ckan/model/authz.py index 85b474483ab..cbe67224322 100644 --- a/ckan/model/authz.py +++ b/ckan/model/authz.py @@ -47,7 +47,8 @@ class Action(Enum): SITE_READ = u'read-site' USER_READ = u'read-user' USER_CREATE = u'create-user' - + UPLOAD_ACTION = u'file-upload' + class Role(Enum): ADMIN = u'admin' EDITOR = u'editor' @@ -67,12 +68,14 @@ class Role(Enum): (Role.EDITOR, Action.USER_READ), (Role.EDITOR, Action.SITE_READ), (Role.EDITOR, Action.READ), + (Role.EDITOR, Action.UPLOAD_ACTION), (Role.ANON_EDITOR, Action.EDIT), (Role.ANON_EDITOR, Action.PACKAGE_CREATE), (Role.ANON_EDITOR, Action.USER_CREATE), (Role.ANON_EDITOR, Action.USER_READ), (Role.ANON_EDITOR, Action.SITE_READ), (Role.ANON_EDITOR, Action.READ), + (Role.ANON_EDITOR, Action.UPLOAD_ACTION), (Role.READER, Action.USER_CREATE), (Role.READER, Action.USER_READ), (Role.READER, Action.SITE_READ), diff --git a/ckan/templates/package/new_package_form.html b/ckan/templates/package/new_package_form.html index d2319dce199..4b76a8f32d8 100644 --- a/ckan/templates/package/new_package_form.html +++ b/ckan/templates/package/new_package_form.html @@ -100,7 +100,7 @@
This upload form is valid for a limited time (usually 1h or so). If the + form expires please reload the page.
+ + +