diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py index d6b5331f348..d34b2f71dcc 100644 --- a/ckan/controllers/package.py +++ b/ckan/controllers/package.py @@ -408,7 +408,7 @@ def read_ajax(self, id, revision=None): abort(404, _('Package not found')) ## hack as db_to_form schema should have this - data['tag_string'] = ' '.join([tag['name'] for tag in data.get('tags', [])]) + data['tag_string'] = ', '.join([tag['name'] for tag in data.get('tags', [])]) data.pop('tags') data = flatten_to_string_key(data) response.headers['Content-Type'] = 'application/json;charset=utf-8' diff --git a/ckan/forms/common.py b/ckan/forms/common.py index d5b8470bb89..61c4ba75e4c 100644 --- a/ckan/forms/common.py +++ b/ckan/forms/common.py @@ -481,7 +481,7 @@ def is_collection(self): class TagEditRenderer(formalchemy.fields.FieldRenderer): def render(self, **kwargs): - kwargs['value'] = ' '.join(self.value) + kwargs['value'] = ', '.join(self.value) kwargs['size'] = 60 api_url = config.get('ckan.api_url', '/').rstrip('/') tagcomplete_url = api_url+h.url_for(controller='api', @@ -495,7 +495,7 @@ def render(self, **kwargs): def _tag_links(self): tags = self.value tag_links = [h.link_to(tagname, h.url_for(controller='tag', action='read', id=tagname)) for tagname in tags] - return literal(' '.join(tag_links)) + return literal(', '.join(tag_links)) def render_readonly(self, **kwargs): tag_links = self._tag_links() @@ -504,21 +504,29 @@ def render_readonly(self, **kwargs): def _serialized_value(self): # despite being a collection, there is only one field to get # the values from - tags_as_string = self.params.getone(self.name) - tags = tags_as_string.replace(',', ' ').lower().split() + tags_as_string = self.params.getone(self.name).strip() + if tags_as_string == "": + return [] + tags = map(lambda s: s.strip(), tags_as_string.split(',')) return tags - tagname_match = re.compile('[\w\-_.]*$', re.UNICODE) - tagname_uppercase = re.compile('[A-Z]') + tagname_match = re.compile('[^"]*$') # already split on commas def tag_name_validator(self, val, field): for tag in val: + + # formalchemy deserializes an empty string into None. + # This happens if the tagstring gets split on commas, and + # there's an empty string in the resulting list. + # e.g. "tag1,,tag2" ; " ,tag1" and "tag1," will all result + # in an empty tag name. + if tag is None: + tag = u'' # let the minimum length validator take care of it. + min_length = 2 if len(tag) < min_length: raise formalchemy.ValidationError(_('Tag "%s" length is less than minimum %s') % (tag, min_length)) if not self.tagname_match.match(tag): - raise formalchemy.ValidationError(_('Tag "%s" must be alphanumeric characters or symbols: -_.') % (tag)) - if self.tagname_uppercase.search(tag): - raise formalchemy.ValidationError(_('Tag "%s" must not be uppercase' % (tag))) + raise formalchemy.ValidationError(_('Tag "%s" must not contain any quotation marks: "') % (tag)) class ExtrasField(ConfiguredField): '''A form field for arbitrary "extras" dataset data.''' diff --git a/ckan/forms/package.py b/ckan/forms/package.py index c3d1e48f8c5..cf70934b419 100644 --- a/ckan/forms/package.py +++ b/ckan/forms/package.py @@ -66,8 +66,8 @@ def build_package_form(is_admin=False, user_editable_groups=None, **params): builder.set_field_text( 'tags', _('Tags'), - instructions=literal(_('Terms that may link this dataset to similar ones. For more information on conventions, see this wiki page.') % 'http://wiki.okfn.org/ckan/doc/faq#TagConventions'), - hints=_('e.g. pollution rivers water-quality') + instructions=literal(_('Comma-separated terms that may link this dataset to similar ones. For more information on conventions, see this wiki page.') % 'http://wiki.okfn.org/ckan/doc/faq#TagConventions'), + hints=_('e.g. pollution, rivers, water quality') ) builder.set_field_text( 'resources', diff --git a/ckan/forms/package_dict.py b/ckan/forms/package_dict.py index b424f48bbd0..347ebcbab69 100644 --- a/ckan/forms/package_dict.py +++ b/ckan/forms/package_dict.py @@ -47,7 +47,7 @@ def get_package_dict(pkg=None, blank=False, fs=None, user_editable_groups=None): if field.renderer.name.endswith('-extras'): indict[field.renderer.name] = dict(pkg.extras) if pkg else {} if field.renderer.name.endswith('-tags'): - indict[field.renderer.name] = ' '.join([tag.name for tag in pkg.tags]) if pkg else '' + indict[field.renderer.name] = ','.join([tag.name for tag in pkg.tags]) if pkg else '' if field.renderer.name.endswith('-resources'): indict[field.renderer.name] = [dict([(key, getattr(res, key)) for key in model.Resource.get_columns()]) for res in pkg.resources] if pkg else [] @@ -98,7 +98,7 @@ def edit_package_dict(dict_, changed_items, id=''): resources.append(res_dict_str) dict_[resources_key] = resources elif key == tags_key and isinstance(value, list): - dict_[key] = ' '.join(value) + dict_[key] = ','.join(value) else: dict_[key] = value elif key == download_url_key: diff --git a/ckan/logic/validators.py b/ckan/logic/validators.py index 9598ac819a9..e8338fec37c 100644 --- a/ckan/logic/validators.py +++ b/ckan/logic/validators.py @@ -153,9 +153,9 @@ def tag_length_validator(value, context): def tag_name_validator(value, context): - tagname_match = re.compile('[^,]*$', re.UNICODE) + tagname_match = re.compile('[^,"]*$', re.UNICODE) if not tagname_match.match(value): - raise Invalid(_('Tag "%s" must not contain commas' % value)) + raise Invalid(_('Tag "%s" must not contain commas nor quotes' % value)) return value def tag_not_uppercase(value, context): @@ -169,9 +169,17 @@ def tag_string_convert(key, data, errors, context): value = data[key] - tags = value.split() + # Ensure a tag string with only whitespace + # is converted to the empty list of tags. + # If we were to split(',') on this string, + # we'd get the non-empty list, ['']. + if not value.strip(): + return + + tags = map(lambda s: s.strip(), + value.split(',')) for num, tag in enumerate(tags): - data[('tags', num, 'name')] = tag.lower() + data[('tags', num, 'name')] = tag for tag in tags: tag_length_validator(tag, context) diff --git a/ckan/templates/package/new_package_form.html b/ckan/templates/package/new_package_form.html index eb947c70607..5c9df3799f8 100644 --- a/ckan/templates/package/new_package_form.html +++ b/ckan/templates/package/new_package_form.html @@ -137,10 +137,10 @@

Tags

+ value="${data.get('tag_string') or ', '.join([tag['name'] for tag in data.get('tags', [])])}" />
-
Terms that may link this dataset to similar ones. For more information on conventions, see this wiki page.
-
e.g. pollution rivers water-quality
+
Comma-separated terms that may link this dataset to similar ones. For more information on conventions, see this wiki page.
+
e.g. pollution, rivers, water quality
${errors.get('tag_string', '')}
diff --git a/ckan/tests/forms/test_package.py b/ckan/tests/forms/test_package.py index e41e0e9b8a7..46f64649906 100644 --- a/ckan/tests/forms/test_package.py +++ b/ckan/tests/forms/test_package.py @@ -127,7 +127,7 @@ def test_3_sync_new(self): indict = _get_blank_param_dict() indict['Package--name'] = u'testname' indict['Package--notes'] = u'some new notes' - indict['Package--tags'] = u'russian tolstoy, ' + newtagname, + indict['Package--tags'] = u'russian, tolstoy, ' + newtagname, indict['Package--license_id'] = u'gpl-3.0' indict['Package--extras-newfield0-key'] = u'testkey' indict['Package--extras-newfield0-value'] = u'testvalue' @@ -178,7 +178,7 @@ def test_4_sync_update(self): indict = _get_blank_param_dict(anna) indict[prefix + 'name'] = u'annakaren' indict[prefix + 'notes'] = u'new notes' - indict[prefix + 'tags'] = u'russian ' + newtagname + indict[prefix + 'tags'] = u'russian ,' + newtagname indict[prefix + 'license_id'] = u'gpl-3.0' indict[prefix + 'extras-newfield0-key'] = u'testkey' indict[prefix + 'extras-newfield0-value'] = u'testvalue' @@ -336,8 +336,11 @@ def test_1_tag_name(self): prefix = 'Package-%s-' % anna.id indict = _get_blank_param_dict(anna) - good_names = [ 'blah', 'ab', 'ab1', 'some-random-made-up-name', 'has_underscore', u'unicode-\xe0', 'dot.in.name', 'blAh' ] # nb: becomes automatically lowercase - bad_names = [ 'a', 'percent%' ] + good_names = [ 'blah', 'ab', 'ab1', 'some-random-made-up-name',\ + 'has_underscore', u'unicode-\xe0', 'dot.in.name',\ + 'multiple words', u'with Greek omega \u03a9', 'CAPITALS'] + bad_names = [ 'a', ' ,leading comma', 'trailing comma,',\ + 'empty,,tag' 'quote"character'] for i, name in enumerate(good_names): indict[prefix + 'name'] = u'okname' diff --git a/ckan/tests/functional/test_package.py b/ckan/tests/functional/test_package.py index c3f74dc06bd..9e6f16a51a7 100644 --- a/ckan/tests/functional/test_package.py +++ b/ckan/tests/functional/test_package.py @@ -87,7 +87,7 @@ def _check_package_read(self, res, **params): assert params['notes'] in main_div, main_div_str license = model.Package.get_license_register()[params['license_id']] assert license.title in main_div, (license.title, main_div_str) - tag_names = [tag.lower() for tag in params['tags']] + tag_names = list(params['tags']) self.check_named_element(main_div, 'ul', *tag_names) if params.has_key('state'): assert 'State: %s' % params['state'] in main_div.replace('', ''), main_div_str @@ -156,7 +156,7 @@ def check_form_filled_correctly(self, res, **params): self.check_tag_and_data(main_res, prefix+'notes', params['notes']) self.check_tag_and_data(main_res, 'selected', params['license_id']) if isinstance(params['tags'], (str, unicode)): - tags = params['tags'].split() + tags = map(lambda s: s.strip(), params['tags'].split(',')) else: tags = params['tags'] for tag in tags: @@ -699,9 +699,9 @@ def test_edit_2_not_groups(self): def test_edit_2_tags_and_groups(self): # testing tag updating - newtagnames = [u'russian', u'tolstoy', u'superb'] + newtagnames = [u'russian', u'tolstoy', u'superb book'] newtags = newtagnames - tagvalues = ' '.join(newtags) + tagvalues = ','.join(newtags) fv = self.res.forms['dataset-edit'] prefix = '' fv[prefix + 'tag_string'] = tagvalues @@ -765,7 +765,7 @@ def test_edit_all_fields(self): pkg.notes= u'this is editpkg' pkg.version = u'2.2' t1 = model.Tag(name=u'one') - t2 = model.Tag(name=u'two') + t2 = model.Tag(name=u'two words') pkg.tags = [t1, t2] pkg.state = model.State.DELETED pkg.license_id = u'other-open' @@ -800,8 +800,8 @@ def test_edit_all_fields(self): notes = u'Very important' license_id = u'gpl-3.0' state = model.State.ACTIVE - tags = (u'tag1', u'tag2', u'tag3') - tags_txt = u' '.join(tags) + tags = (u'tag1', u'tag2', u'tag 3') + tags_txt = u','.join(tags) extra_changed = 'key1', self.value1 + ' CHANGED', False extra_new = 'newkey', 'newvalue', False log_message = 'This is a comment' @@ -1108,8 +1108,8 @@ def test_new_all_fields(self): download_url = u'http://something.com/somewhere-else.zip' notes = u'Very important' license_id = u'gpl-3.0' - tags = (u'tag1', u'tag2', u'tag3', u'SomeCaps') - tags_txt = u' '.join(tags) + tags = (u'tag1', u'tag2!', u'tag 3', u'SomeCaps') + tags_txt = u','.join(tags) extras = {self.key1:self.value1, 'key2':'value2', 'key3':'value3'} log_message = 'This is a comment' assert not model.Package.by_name(name) @@ -1150,8 +1150,7 @@ def test_new_all_fields(self): assert pkg.license.id == license_id saved_tagnames = [str(tag.name) for tag in pkg.tags] saved_tagnames.sort() - expected_tagnames = [tag.lower() for tag in tags] - expected_tagnames.sort() + expected_tagnames = sorted(tags) assert saved_tagnames == expected_tagnames, '%r != %r' % (saved_tagnames, expected_tagnames) saved_groupnames = [str(group.name) for group in pkg.groups] assert len(pkg.extras) == len(extras) diff --git a/ckan/tests/lib/test_dictization_schema.py b/ckan/tests/lib/test_dictization_schema.py index a270108fab2..133bf38ce6e 100644 --- a/ckan/tests/lib/test_dictization_schema.py +++ b/ckan/tests/lib/test_dictization_schema.py @@ -176,7 +176,7 @@ def test_4_tag_schema_allows_punctuation(self): """Asserts that a tag name with punctuation (except commas) is valid""" ignored = "" data = { - 'name': u'.?!<>\\/"%^&*()-_+=~#\'@`', + 'name': u'.?!<>\\/%^&*()-_+=~#\'@`', 'revision_timestamp': ignored, 'state': ignored } diff --git a/ckan/tests/test_dumper.py b/ckan/tests/test_dumper.py index 6beda8fc4de..df0977973af 100644 --- a/ckan/tests/test_dumper.py +++ b/ckan/tests/test_dumper.py @@ -83,7 +83,7 @@ def test_dump(self): for key in ['User']: assert key not in tables, '%s should not be in %s' % (key, tables) assert len(dumpeddata['Package']) == 2, len(dumpeddata['Package']) - assert len(dumpeddata['Tag']) == 2, len(dumpeddata['Tag']) + assert len(dumpeddata['Tag']) == 3, len(dumpeddata['Tag']) assert len(dumpeddata['PackageRevision']) == 2, len(dumpeddata['PackageRevision']) assert len(dumpeddata['Group']) == 2, len(dumpeddata['Group'])