From 158b23e4999fa610d0d3bedf55d30905e0fa3b97 Mon Sep 17 00:00:00 2001 From: John Glover Date: Wed, 14 Dec 2011 14:53:04 +0000 Subject: [PATCH 1/4] [191] latest version of #191 test remove last_modified function --- ckan/model/package.py | 33 +++++++++-------- ckan/tests/functional/api/test_action.py | 47 ++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/ckan/model/package.py b/ckan/model/package.py index ee2e49507ec..18910e36334 100644 --- a/ckan/model/package.py +++ b/ckan/model/package.py @@ -457,17 +457,14 @@ def diff(self, to_revision=None, from_revision=None): results[key] = value_diff return results - @staticmethod - def last_modified(*av): + @property + def metadata_modified(self): """ - Return most recent timestamp for a package revision, with optionally - extra where clause. + Return most recent timestamp for a package revision. """ - from ckan import model - where = [] - for arg in av: - if isinstance(arg, expression.ClauseElement) or isinstance(arg, basestring): - where.append(arg) + import ckan.model as model + + where = [model.package_table.c.id == self.id] where_clauses = [ and_(model.package_table.c.revision_id == model.revision_table.c.id, *where), and_(model.package_extra_table.c.package_id == model.package_table.c.id, @@ -484,10 +481,13 @@ def last_modified(*av): and_(model.package_tag_table.c.package_id == model.package_table.c.id, model.package_tag_table.c.revision_id == model.revision_table.c.id, *where) ] + query = union(*[select([model.revision_table.c.timestamp], x) for x in where_clauses] ).order_by("timestamp DESC").limit(1) + conn = model.meta.engine.connect() result = conn.execute(query).fetchone() + if result: result_datetime = iso_date_to_datetime_for_sqlite(result[0]) timestamp = result_datetime.utctimetuple() @@ -495,20 +495,21 @@ def last_modified(*av): else: timestamp, usecs = gmtime(), 0 # use timegm instead of mktime, because we don't want it localised - return timegm(timestamp) + usecs + return datetime.datetime.utcfromtimestamp(timegm(timestamp)+usecs) - @property - def metadata_modified(self): - import ckan.model as model - epochtime = self.last_modified(model.package_table.c.id==self.id) - return datetime.datetime.utcfromtimestamp(epochtime) + # q = model.Session.query(model.PackageRevision)\ + # .filter(model.PackageRevision.id == self.id)\ + # .order_by(model.PackageRevision.revision_timestamp.desc()) + # ts = q.first() + # return ts.revision_timestamp @property def metadata_created(self): import ckan.model as model q = model.Session.query(model.PackageRevision)\ .filter(model.PackageRevision.id == self.id)\ - .order_by(model.PackageRevision.revision_timestamp.asc()) + .order_by(model.PackageRevision.revision_timestamp.asc())\ + .limit(1) ts = q.first() if ts is not None: return ts.revision_timestamp diff --git a/ckan/tests/functional/api/test_action.py b/ckan/tests/functional/api/test_action.py index 7688a54e30d..634b4af7263 100644 --- a/ckan/tests/functional/api/test_action.py +++ b/ckan/tests/functional/api/test_action.py @@ -1135,3 +1135,50 @@ def test_3_bad_param(self): status=400) assert '"message": "Search Query is invalid:' in res.body, res.body assert '"Invalid search parameters: [u\'weird_param\']' in res.body, res.body + + def test_4_sort_by_metadata_modified(self): + search_params = '%s=1' % json.dumps({ + 'q': '*:*', + 'fl': 'name, metadata_modified', + 'sort': u'metadata_modified desc' + }) + + # modify warandpeace, check that it is the first search result + rev = model.repo.new_revision() + pkg = model.Package.get('warandpeace') + pkg.title = "War and Peace [UPDATED]" + model.repo.commit_and_remove() + + res = self.app.post('/api/action/package_search', params=search_params) + result = json.loads(res.body)['result'] + result_names = [r['name'] for r in result['results']] + assert result_names == ['warandpeace', 'annakarenina'], result_names + + # modify annakarenina, check that it is the first search result + rev = model.repo.new_revision() + pkg = model.Package.get('annakarenina') + pkg.title = "A Novel By Tolstoy [UPDATED]" + model.repo.commit_and_remove() + + res = self.app.post('/api/action/package_search', params=search_params) + result = json.loads(res.body)['result'] + result_names = [r['name'] for r in result['results']] + assert result_names == ['annakarenina', 'warandpeace'], result_names + + # add a tag to warandpeace, check that it is the first result + pkg = model.Package.get('warandpeace') + pkg_params = '%s=1' % json.dumps({'id': pkg.id}) + res = self.app.post('/api/action/package_show', params=pkg_params) + pkg_dict = json.loads(res.body)['result'] + pkg_dict['tags'].append({'name': 'new-tag'}) + pkg_params = '%s=1' % json.dumps(pkg_dict) + res = self.app.post('/api/action/package_update', params=pkg_params, + extra_environ={'Authorization': 'tester'}) + + res = self.app.post('/api/action/package_search', params=search_params) + result = json.loads(res.body)['result'] + result_names = [r['name'] for r in result['results']] + assert result_names == ['warandpeace', 'annakarenina'], result_names + + # TODO: add an extra to annararenina, check that it is the first result + From 09d076847cf4abcd295ddef8be8a7348ae471ba8 Mon Sep 17 00:00:00 2001 From: John Glover Date: Wed, 14 Dec 2011 14:53:27 +0000 Subject: [PATCH 2/4] [docs] add search by modification date example --- doc/apiv3.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/apiv3.rst b/doc/apiv3.rst index 4a4485d1700..db82aaf6c71 100644 --- a/doc/apiv3.rst +++ b/doc/apiv3.rst @@ -426,6 +426,16 @@ These parameters are all the standard SOLR syntax (in contrast to the syntax use | | | | included in the results. | +-----------------------+---------------+-----------------------------------------------------+----------------------------------+ +Search Examples +--------------- + +Sorting by modified date: + +:: + + $ curl http://thedatahub.org/api/action/package_search -d '{"q": "groups:lodcloud", sort": "metadata_modified asc"}' + + Status Codes ~~~~~~~~~~~~ From f98a4b2a5f5013fa4aed475bd8b3237bb7847fcc Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 15 Dec 2011 17:14:52 +0000 Subject: [PATCH 3/4] [model]: #1546/#191 Fix for SOLR indexing of new packages, broken in a619. --- ckan/model/package.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ckan/model/package.py b/ckan/model/package.py index 18910e36334..97b2487425c 100644 --- a/ckan/model/package.py +++ b/ckan/model/package.py @@ -483,9 +483,11 @@ def metadata_modified(self): ] query = union(*[select([model.revision_table.c.timestamp], x) for x in where_clauses] - ).order_by("timestamp DESC").limit(1) - - conn = model.meta.engine.connect() + ).order_by('timestamp DESC').limit(1) + # Use current connection because we might be in a 'before_commit' of + # a SessionExtension - only by using the current connection can we get + # at the newly created revision etc. objects. + conn = model.Session.connection() result = conn.execute(query).fetchone() if result: From bebf7a5ed5ac752f27eef0dfe211f9047434d27d Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 15 Dec 2011 18:14:12 +0000 Subject: [PATCH 4/4] [model,lib,tests]: Added (much needed) tests for calculation of metadata_modified. Fixed up float breakage. If it cannot calculate metadata_modified it returns None instead of now() - this should never happen and if it does it would be good to find out why as its probably an error. Optimised package dictization to avoid calculation metadata_modified and created twice. --- ckan/lib/dictization/model_dictize.py | 10 +++-- ckan/model/group.py | 2 +- ckan/model/package.py | 20 +++------ ckan/tests/functional/api/test_action.py | 1 - ckan/tests/models/test_package.py | 55 +++++++++++++++++++++++- doc/apiv3.rst | 11 +---- 6 files changed, 69 insertions(+), 30 deletions(-) diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 675d47f1712..356c8277845 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -263,10 +263,12 @@ def package_to_api1(pkg, context): site_url = config.get('ckan.site_url', None) if site_url: dictized['ckan_url'] = '%s/dataset/%s' % (site_url, pkg.name) - dictized['metadata_modified'] = pkg.metadata_modified.isoformat() \ - if pkg.metadata_modified else None - dictized['metadata_created'] = pkg.metadata_created.isoformat() \ - if pkg.metadata_created else None + metadata_modified = pkg.metadata_modified + dictized['metadata_modified'] = metadata_modified.isoformat() \ + if metadata_modified else None + metadata_created = pkg.metadata_created + dictized['metadata_created'] = metadata_created.isoformat() \ + if metadata_created else None subjects = dictized.pop("relationships_as_subject") objects = dictized.pop("relationships_as_object") diff --git a/ckan/model/group.py b/ckan/model/group.py index 24aa84e21b6..5fbec961608 100644 --- a/ckan/model/group.py +++ b/ckan/model/group.py @@ -12,7 +12,7 @@ __all__ = ['group_table', 'Group', 'package_revision_table', 'PackageGroup', 'GroupRevision', 'PackageGroupRevision', - 'package_group_revision_table'] + 'package_group_table', 'package_group_revision_table'] package_group_table = Table('package_group', metadata, Column('id', UnicodeText, primary_key=True, default=make_uuid), diff --git a/ckan/model/package.py b/ckan/model/package.py index 97b2487425c..19941e3acef 100644 --- a/ckan/model/package.py +++ b/ckan/model/package.py @@ -460,7 +460,8 @@ def diff(self, to_revision=None, from_revision=None): @property def metadata_modified(self): """ - Return most recent timestamp for a package revision. + Return most recent timestamp for revisions related to this package. + NB Excludes changes to the package's groups """ import ckan.model as model @@ -492,19 +493,12 @@ def metadata_modified(self): if result: result_datetime = iso_date_to_datetime_for_sqlite(result[0]) - timestamp = result_datetime.utctimetuple() + timestamp_without_usecs = result_datetime.utctimetuple() usecs = float(result_datetime.microsecond) / 1e6 - else: - timestamp, usecs = gmtime(), 0 - # use timegm instead of mktime, because we don't want it localised - return datetime.datetime.utcfromtimestamp(timegm(timestamp)+usecs) - - # q = model.Session.query(model.PackageRevision)\ - # .filter(model.PackageRevision.id == self.id)\ - # .order_by(model.PackageRevision.revision_timestamp.desc()) - # ts = q.first() - # return ts.revision_timestamp - + # use timegm instead of mktime, because we don't want it localised + timestamp_float = timegm(timestamp_without_usecs) + usecs + return datetime.datetime.utcfromtimestamp(timestamp_float) + @property def metadata_created(self): import ckan.model as model diff --git a/ckan/tests/functional/api/test_action.py b/ckan/tests/functional/api/test_action.py index 634b4af7263..4f3a91c8527 100644 --- a/ckan/tests/functional/api/test_action.py +++ b/ckan/tests/functional/api/test_action.py @@ -1180,5 +1180,4 @@ def test_4_sort_by_metadata_modified(self): result_names = [r['name'] for r in result['results']] assert result_names == ['warandpeace', 'annakarenina'], result_names - # TODO: add an extra to annararenina, check that it is the first result diff --git a/ckan/tests/models/test_package.py b/ckan/tests/models/test_package.py index 896c2e8e6a9..8371f226f7d 100644 --- a/ckan/tests/models/test_package.py +++ b/ckan/tests/models/test_package.py @@ -128,7 +128,7 @@ def test_metadata_created_and_modified(self): assert package.metadata_modified == created_timestamp,\ (package.metadata_modified, created_timestamp) - # update the package it + # update the package rev = model.repo.new_revision() package = model.Package.by_name(name) package.title = "test_metadata_new_title" @@ -138,6 +138,59 @@ def test_metadata_created_and_modified(self): package = model.Package.by_name(name) assert package.metadata_created == created_timestamp assert package.metadata_modified == modified_timestamp + last_modified_timestamp = modified_timestamp + + # update a package's tag + rev = model.repo.new_revision() + package = model.Package.by_name(name) + package.add_tag_by_name('new-tag') + modified_timestamp = model.Session().revision.timestamp + assert modified_timestamp != last_modified_timestamp + model.repo.commit_and_remove() + + package = model.Package.by_name(name) + assert package.metadata_created == created_timestamp + assert package.metadata_modified == modified_timestamp + last_modified_timestamp = modified_timestamp + + # update a package's extra + rev = model.repo.new_revision() + package = model.Package.by_name(name) + package.extras['new-key'] = 'value' + modified_timestamp = model.Session().revision.timestamp + assert modified_timestamp != last_modified_timestamp + model.repo.commit_and_remove() + + package = model.Package.by_name(name) + assert package.metadata_created == created_timestamp + assert package.metadata_modified == modified_timestamp + last_modified_timestamp = modified_timestamp + + # update a package's relationship + rev = model.repo.new_revision() + package = model.Package.by_name(name) + anna = model.Package.by_name(u'annakarenina') + package.add_relationship(u'child_of', anna) + modified_timestamp = model.Session().revision.timestamp + assert modified_timestamp != last_modified_timestamp + model.repo.commit_and_remove() + + package = model.Package.by_name(name) + assert package.metadata_created == created_timestamp + assert package.metadata_modified == modified_timestamp + last_modified_timestamp = modified_timestamp + + # update a package's group - NB no change this time + rev = model.repo.new_revision() + group = model.Group.by_name('roger') + group.add_package_by_name(name) + modified_timestamp = model.Session().revision.timestamp + assert modified_timestamp != last_modified_timestamp + model.repo.commit_and_remove() + + package = model.Package.by_name(name) + assert package.metadata_created == created_timestamp + assert package.metadata_modified == last_modified_timestamp # no change class TestPackageWithTags: diff --git a/doc/apiv3.rst b/doc/apiv3.rst index db82aaf6c71..c74cb70c29b 100644 --- a/doc/apiv3.rst +++ b/doc/apiv3.rst @@ -365,7 +365,7 @@ These parameters are all the standard SOLR syntax (in contrast to the syntax use | | || fl=* | | +-----------------------+---------------+----------------------------------+----------------------------------+ | sort | field name, || sort=name asc | Changes the sort order according | -| | asc / dec | | to the field and direction given.| +| | asc / dec || sort=metadata_modified asc | to the field and direction given.| | | | | default: score desc, name asc | +-----------------------+---------------+----------------------------------+----------------------------------+ | start, rows | result-int | start=40&rows=20 | Pagination options. Start is the | @@ -426,15 +426,6 @@ These parameters are all the standard SOLR syntax (in contrast to the syntax use | | | | included in the results. | +-----------------------+---------------+-----------------------------------------------------+----------------------------------+ -Search Examples ---------------- - -Sorting by modified date: - -:: - - $ curl http://thedatahub.org/api/action/package_search -d '{"q": "groups:lodcloud", sort": "metadata_modified asc"}' - Status Codes ~~~~~~~~~~~~