diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py index d4cb31451b0..9d1c6a0f13f 100644 --- a/ckan/controllers/package.py +++ b/ckan/controllers/package.py @@ -179,7 +179,6 @@ def _sort_by(fields): else: c.sort_by_fields = [field.split()[0] for field in sort_by.split(',')] - c.sort_by_selected = sort_by def pager_url(q=None, page=None): params = list(params_nopage) @@ -250,6 +249,7 @@ def pager_url(q=None, page=None): } query = get_action('package_search')(context, data_dict) + c.sort_by_selected = query['sort'] c.page = h.Page( collection=query['results'], diff --git a/ckan/lib/app_globals.py b/ckan/lib/app_globals.py index ecd2847b072..a58a41ffc48 100644 --- a/ckan/lib/app_globals.py +++ b/ckan/lib/app_globals.py @@ -57,6 +57,7 @@ 'openid_enabled': {'default': 'true', 'type' : 'bool'}, 'debug': {'default': 'false', 'type' : 'bool'}, 'ckan.debug_supress_header' : {'default': 'false', 'type' : 'bool'}, + 'ckan.tracking_enabled' : {'default': 'false', 'type' : 'bool'}, # int 'ckan.datasets_per_page': {'default': '20', 'type': 'int'}, diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index b531e97ee15..c67d9519c38 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -1048,7 +1048,7 @@ def export_tracking(self, engine, output_filename): for r in total_views]) def update_tracking(self, engine, summary_date): - PACKAGE_URL = '/dataset/' + PACKAGE_URL = '%/dataset/' # clear out existing data before adding new sql = '''DELETE FROM tracking_summary WHERE tracking_date='%s'; ''' % summary_date @@ -1074,7 +1074,7 @@ def update_tracking(self, engine, summary_date): sql = '''UPDATE tracking_summary t SET package_id = COALESCE( (SELECT id FROM package p - WHERE t.url = %s || p.name) + WHERE t.url LIKE %s || p.name) ,'~~not~found~~') WHERE t.package_id IS NULL AND tracking_type = 'page';''' diff --git a/ckan/lib/create_test_data.py b/ckan/lib/create_test_data.py index e2aba3b242f..6720bbb9c79 100644 --- a/ckan/lib/create_test_data.py +++ b/ckan/lib/create_test_data.py @@ -148,15 +148,15 @@ def create_arbitrary(cls, package_dicts, relationships=[], new_group_names = set() new_groups = {} - rev = model.repo.new_revision() - rev.author = cls.author - rev.message = u'Creating test packages.' admins_list = defaultdict(list) # package_name: admin_names if package_dicts: if isinstance(package_dicts, dict): package_dicts = [package_dicts] for item in package_dicts: + rev = model.repo.new_revision() + rev.author = cls.author + rev.message = u'Creating test packages.' pkg_dict = {} for field in cls.pkg_core_fields: if item.has_key(field): @@ -245,7 +245,7 @@ def create_arbitrary(cls, package_dicts, relationships=[], model.setup_default_user_roles(pkg, admins=[]) for admin in admins: admins_list[item['name']].append(admin) - model.repo.commit_and_remove() + model.repo.commit_and_remove() needs_commit = False diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index 83852e6d36e..76af5965d89 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -316,11 +316,6 @@ def run(self, query): rows_to_query = rows_to_return query['rows'] = rows_to_query - # order by score if no 'sort' term given - order_by = query.get('sort') - if order_by == 'rank' or order_by is None: - query['sort'] = 'score desc, name asc' - # show only results from this CKAN instance fq = query.get('fq', '') if not '+site_id:' in fq: diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 9340a7b0c9c..236e1c3559f 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1151,8 +1151,9 @@ def package_search(context, data_dict): :param rows: the number of matching rows to return. :type rows: int :param sort: sorting of the search results. Optional. Default: - "score desc, name asc". As per the solr documentation, this is a - comma-separated string of field names and sort-orderings. + 'relevance asc, metadata_modified desc'. As per the solr + documentation, this is a comma-separated string of field names and + sort-orderings. :type sort: string :param start: the offset in the complete result for where the set of returned datasets should begin. @@ -1233,6 +1234,9 @@ def package_search(context, data_dict): # the query abort = data_dict.get('abort_search',False) + if data_dict.get('sort') in (None, 'rank'): + data_dict['sort'] = 'score desc, metadata_created desc' + results = [] if not abort: # return a list of package ids @@ -1288,7 +1292,8 @@ def package_search(context, data_dict): search_results = { 'count': count, 'facets': facets, - 'results': results + 'results': results, + 'sort': data_dict['sort'] } # Transform facets into a more useful data structure. diff --git a/ckan/model/__init__.py b/ckan/model/__init__.py index 7b3d7e5887a..96812963a17 100644 --- a/ckan/model/__init__.py +++ b/ckan/model/__init__.py @@ -101,6 +101,7 @@ from tracking import ( tracking_summary_table, TrackingSummary, + tracking_raw_table ) from rating import ( Rating, diff --git a/ckan/model/tracking.py b/ckan/model/tracking.py index c10684960ea..dca69212a83 100644 --- a/ckan/model/tracking.py +++ b/ckan/model/tracking.py @@ -3,7 +3,15 @@ import meta import domain_object -__all__ = ['tracking_summary_table', 'TrackingSummary'] +__all__ = ['tracking_summary_table', 'TrackingSummary', 'tracking_raw_table'] + +tracking_raw_table = Table('tracking_raw', meta.metadata, + Column('user_key', types.Unicode(100), nullable=False), + Column('url', types.UnicodeText, nullable=False), + Column('tracking_type', types.Unicode(10), nullable=False), + Column('access_timestamp', types.DateTime), + ) + tracking_summary_table = Table('tracking_summary', meta.metadata, Column('url', types.UnicodeText, primary_key=True, nullable=False), diff --git a/ckan/templates/snippets/sort_by.html b/ckan/templates/snippets/sort_by.html index a2a6076c3bc..c568e2464f9 100644 --- a/ckan/templates/snippets/sort_by.html +++ b/ckan/templates/snippets/sort_by.html @@ -11,11 +11,13 @@ diff --git a/ckan/tests/functional/test_pagination.py b/ckan/tests/functional/test_pagination.py index e5ed5445910..94f0b9ee8d3 100644 --- a/ckan/tests/functional/test_pagination.py +++ b/ckan/tests/functional/test_pagination.py @@ -59,25 +59,25 @@ def test_package_search_p1(self): res = self.app.get(url_for(controller='package', action='search', q='groups:group_00')) assert 'href="/dataset?q=groups%3Agroup_00&page=2"' in res pkg_numbers = scrape_search_results(res, 'dataset') - assert_equal(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19'], pkg_numbers) + assert_equal(['50', '49', '48', '47', '46', '45', '44', '43', '42', '41', '40', '39', '38', '37', '36', '35', '34', '33', '32', '31'], pkg_numbers) def test_package_search_p2(self): res = self.app.get(url_for(controller='package', action='search', q='groups:group_00', page=2)) assert 'href="/dataset?q=groups%3Agroup_00&page=1"' in res pkg_numbers = scrape_search_results(res, 'dataset') - assert_equal(['20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39'], pkg_numbers) + assert_equal(['30', '29', '28', '27', '26', '25', '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14', '13', '12', '11'], pkg_numbers) def test_group_datasets_read_p1(self): res = self.app.get(url_for(controller='group', action='read', id='group_00')) assert 'href="/group/group_00?page=2' in res, res pkg_numbers = scrape_search_results(res, 'group_dataset') - assert_equal(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19'], pkg_numbers) + assert_equal(['50', '49', '48', '47', '46', '45', '44', '43', '42', '41', '40', '39', '38', '37', '36', '35', '34', '33', '32', '31'], pkg_numbers) def test_group_datasets_read_p2(self): res = self.app.get(url_for(controller='group', action='read', id='group_00', page=2)) assert 'href="/group/group_00?page=1' in res, res pkg_numbers = scrape_search_results(res, 'group_dataset') - assert_equal(['20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39'], pkg_numbers) + assert_equal(['30', '29', '28', '27', '26', '25', '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14', '13', '12', '11'], pkg_numbers) class TestPaginationGroup(TestController): @classmethod diff --git a/ckan/tests/lib/test_solr_package_search.py b/ckan/tests/lib/test_solr_package_search.py index 37980a9f5c0..98939c2d479 100644 --- a/ckan/tests/lib/test_solr_package_search.py +++ b/ckan/tests/lib/test_solr_package_search.py @@ -363,7 +363,7 @@ def teardown_class(self): def _do_search(self, q, expected_pkgs, count=None): query = { 'q': q, - 'sort': 'rank' + 'sort': 'score desc, name asc' } result = search.query_for(model.Package).run(query) pkgs = result['results'] @@ -472,7 +472,7 @@ def teardown_class(self): def _do_search(self, q, wanted_results): query = { 'q': q, - 'sort': 'rank' + 'sort': 'score desc, name asc', } result = search.query_for(model.Package).run(query) results = result['results'] diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py index 228e62e962c..b0602807358 100644 --- a/ckan/tests/logic/test_action.py +++ b/ckan/tests/logic/test_action.py @@ -1306,7 +1306,7 @@ def test_1_basic_no_params(self): result = res['result'] assert_equal(res['success'], True) assert_equal(result['count'], 2) - assert_equal(result['results'][0]['name'], 'annakarenina') + assert result['results'][0]['name'] in ('annakarenina', 'warandpeace') # Test GET request res = self.app.get('/api/action/package_search') @@ -1314,7 +1314,7 @@ def test_1_basic_no_params(self): result = res['result'] assert_equal(res['success'], True) assert_equal(result['count'], 2) - assert_equal(result['results'][0]['name'], 'annakarenina') + assert result['results'][0]['name'] in ('annakarenina', 'warandpeace') def test_2_bad_param(self): postparams = '%s=1' % json.dumps({ diff --git a/doc/images/popular-dataset.png b/doc/images/popular-dataset.png new file mode 100644 index 00000000000..a64b392410a Binary files /dev/null and b/doc/images/popular-dataset.png differ diff --git a/doc/images/popular-resource.png b/doc/images/popular-resource.png new file mode 100644 index 00000000000..4f99e9887ce Binary files /dev/null and b/doc/images/popular-resource.png differ diff --git a/doc/images/sort-datasets-by-popularity.png b/doc/images/sort-datasets-by-popularity.png new file mode 100644 index 00000000000..87343b96df2 Binary files /dev/null and b/doc/images/sort-datasets-by-popularity.png differ diff --git a/doc/index.rst b/doc/index.rst index aad7b737ca1..5b4caf8c908 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -49,6 +49,7 @@ Customizing and Extending geospatial multilingual email-notifications + tracking Publishing Datasets =================== diff --git a/doc/tracking.rst b/doc/tracking.rst new file mode 100644 index 00000000000..384028e8a76 --- /dev/null +++ b/doc/tracking.rst @@ -0,0 +1,116 @@ +================== +Page View Tracking +================== + +CKAN can track visits to pages of your site and use this tracking data to: + +* Sort datasets by popularity +* Highlight popular datasets and resources +* Show view counts next to datasets and resources +* Show a list of the most popular datasets +* Export page-view data to a CSV file + +.. seealso:: + + `ckanext-googleanalytics `_ + A CKAN extension that integrates Google Analytics into CKAN. + + +Enabling Page View Tracking +=========================== + +To enable page view tracking: + +1. Put ``ckan.tracking_enabled = true`` in the ``[app:main]`` section of your + CKAN configuration file (e.g. ``development.ini`` or ``production.ini``):: + + [app:main] + ckan.tracking_enabled = true + + Save the file and restart your web server. CKAN will now record raw page + view tracking data in your CKAN database as pages are viewed. + +2. Setup a cron job to update the tracking summary data. + + For operations based on the tracking data CKAN uses a summarised version of + the data, not the raw tracking data that is recorded "live" as page views + happen. The ``paster tracking update`` and ``paster search-index rebuild`` + commands need to be run periodicially to update this tracking summary data. + + You can setup a cron job to run these commands. On most UNIX systems you can + setup a cron job by running ``crontab -e`` in a shell to edit your crontab + file, and adding a line to the file to specify the new job. For more + information run ``man crontab`` in a shell. For example, here is a crontab + line to update the tracking data and rebuild the search index hourly:: + + @hourly /usr/lib/ckan/bin/paster --plugin=ckan tracking update -c /etc/ckan/production.ini && /usr/lib/ckan/bin/paster --plugin=ckan search-index rebuild -r -c /etc/ckan/production.ini + + Replace ``/usr/lib/ckan/bin/`` with the path to the ``bin`` directory of the + virtualenv that you've installed CKAN into, and replace ``/etc/ckan/production.ini`` + with the path to your CKAN configuration file. + + The ``@hourly`` can be replaced with ``@daily``, ``@weekly`` or + ``@monthly``. + + +Retrieving Tracking Data +======================== + +Tracking summary data for datasets and resources is available in the dataset +and resource dictionaries returned by, for example, the ``package_show()`` +API:: + + "tracking_summary": { + "recent": 5, + "total": 15 + }, + +This can be used, for example, by custom templates to show the number of views +next to datasets and resources. A dataset or resource's ``recent`` count is +its number of views in the last 14 days, the ``total`` count is all of its +tracked views (including recent ones). + +You can also export tracking data for all datasets to a CSV file using the +``paster tracking export`` command. For details, run ``paster tracking -h``. + +.. note:: + + Repeatedly visiting the same page will not increase the page's view count! + Page view counting is limited to one view per user per page per day. + + +Sorting Datasets by Popularity +============================== + +Once you've enabled page view tracking on your CKAN site, you can view datasets +most-popular-first by selecting ``Popular`` from the ``Order by:`` dropdown on +the dataset search page: + +.. image:: images/sort-datasets-by-popularity.png + +The datasets are sorted by their number of recent views. + +You can retrieve datasets most-popular-first from the +:doc:`CKAN API ` by passing ``'sort': 'views_recent desc'`` to the +``package_search()`` action. This could be used, for example, by a custom +template to show a list of the most popular datasets on the site's front page. + +.. tip:: + + You can also sort datasets by total views rather than recent views. Pass + ``'sort': 'views_total desc'`` to the ``package_search()`` API, or use the + URL ``/dataset?q=&sort=views_total+desc`` in the web interface. + + +Highlighting Popular Datasets and Resources +=========================================== + +Once you've enabled page view tracking on your CKAN site, popular datasets and +resources (those with more than 10 views) will be highlighted with a "popular" +badge and a tooltip showing the number of views: + +.. image:: images/popular-dataset.png + +.. image:: images/popular-resource.png + +