diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py
index d4cb31451b0..9d1c6a0f13f 100644
--- a/ckan/controllers/package.py
+++ b/ckan/controllers/package.py
@@ -179,7 +179,6 @@ def _sort_by(fields):
else:
c.sort_by_fields = [field.split()[0]
for field in sort_by.split(',')]
- c.sort_by_selected = sort_by
def pager_url(q=None, page=None):
params = list(params_nopage)
@@ -250,6 +249,7 @@ def pager_url(q=None, page=None):
}
query = get_action('package_search')(context, data_dict)
+ c.sort_by_selected = query['sort']
c.page = h.Page(
collection=query['results'],
diff --git a/ckan/lib/app_globals.py b/ckan/lib/app_globals.py
index ecd2847b072..a58a41ffc48 100644
--- a/ckan/lib/app_globals.py
+++ b/ckan/lib/app_globals.py
@@ -57,6 +57,7 @@
'openid_enabled': {'default': 'true', 'type' : 'bool'},
'debug': {'default': 'false', 'type' : 'bool'},
'ckan.debug_supress_header' : {'default': 'false', 'type' : 'bool'},
+ 'ckan.tracking_enabled' : {'default': 'false', 'type' : 'bool'},
# int
'ckan.datasets_per_page': {'default': '20', 'type': 'int'},
diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py
index b531e97ee15..c67d9519c38 100644
--- a/ckan/lib/cli.py
+++ b/ckan/lib/cli.py
@@ -1048,7 +1048,7 @@ def export_tracking(self, engine, output_filename):
for r in total_views])
def update_tracking(self, engine, summary_date):
- PACKAGE_URL = '/dataset/'
+ PACKAGE_URL = '%/dataset/'
# clear out existing data before adding new
sql = '''DELETE FROM tracking_summary
WHERE tracking_date='%s'; ''' % summary_date
@@ -1074,7 +1074,7 @@ def update_tracking(self, engine, summary_date):
sql = '''UPDATE tracking_summary t
SET package_id = COALESCE(
(SELECT id FROM package p
- WHERE t.url = %s || p.name)
+ WHERE t.url LIKE %s || p.name)
,'~~not~found~~')
WHERE t.package_id IS NULL
AND tracking_type = 'page';'''
diff --git a/ckan/lib/create_test_data.py b/ckan/lib/create_test_data.py
index e2aba3b242f..6720bbb9c79 100644
--- a/ckan/lib/create_test_data.py
+++ b/ckan/lib/create_test_data.py
@@ -148,15 +148,15 @@ def create_arbitrary(cls, package_dicts, relationships=[],
new_group_names = set()
new_groups = {}
- rev = model.repo.new_revision()
- rev.author = cls.author
- rev.message = u'Creating test packages.'
admins_list = defaultdict(list) # package_name: admin_names
if package_dicts:
if isinstance(package_dicts, dict):
package_dicts = [package_dicts]
for item in package_dicts:
+ rev = model.repo.new_revision()
+ rev.author = cls.author
+ rev.message = u'Creating test packages.'
pkg_dict = {}
for field in cls.pkg_core_fields:
if item.has_key(field):
@@ -245,7 +245,7 @@ def create_arbitrary(cls, package_dicts, relationships=[],
model.setup_default_user_roles(pkg, admins=[])
for admin in admins:
admins_list[item['name']].append(admin)
- model.repo.commit_and_remove()
+ model.repo.commit_and_remove()
needs_commit = False
diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py
index 83852e6d36e..76af5965d89 100644
--- a/ckan/lib/search/query.py
+++ b/ckan/lib/search/query.py
@@ -316,11 +316,6 @@ def run(self, query):
rows_to_query = rows_to_return
query['rows'] = rows_to_query
- # order by score if no 'sort' term given
- order_by = query.get('sort')
- if order_by == 'rank' or order_by is None:
- query['sort'] = 'score desc, name asc'
-
# show only results from this CKAN instance
fq = query.get('fq', '')
if not '+site_id:' in fq:
diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py
index 9340a7b0c9c..236e1c3559f 100644
--- a/ckan/logic/action/get.py
+++ b/ckan/logic/action/get.py
@@ -1151,8 +1151,9 @@ def package_search(context, data_dict):
:param rows: the number of matching rows to return.
:type rows: int
:param sort: sorting of the search results. Optional. Default:
- "score desc, name asc". As per the solr documentation, this is a
- comma-separated string of field names and sort-orderings.
+ 'relevance asc, metadata_modified desc'. As per the solr
+ documentation, this is a comma-separated string of field names and
+ sort-orderings.
:type sort: string
:param start: the offset in the complete result for where the set of
returned datasets should begin.
@@ -1233,6 +1234,9 @@ def package_search(context, data_dict):
# the query
abort = data_dict.get('abort_search',False)
+ if data_dict.get('sort') in (None, 'rank'):
+ data_dict['sort'] = 'score desc, metadata_created desc'
+
results = []
if not abort:
# return a list of package ids
@@ -1288,7 +1292,8 @@ def package_search(context, data_dict):
search_results = {
'count': count,
'facets': facets,
- 'results': results
+ 'results': results,
+ 'sort': data_dict['sort']
}
# Transform facets into a more useful data structure.
diff --git a/ckan/model/__init__.py b/ckan/model/__init__.py
index 7b3d7e5887a..96812963a17 100644
--- a/ckan/model/__init__.py
+++ b/ckan/model/__init__.py
@@ -101,6 +101,7 @@
from tracking import (
tracking_summary_table,
TrackingSummary,
+ tracking_raw_table
)
from rating import (
Rating,
diff --git a/ckan/model/tracking.py b/ckan/model/tracking.py
index c10684960ea..dca69212a83 100644
--- a/ckan/model/tracking.py
+++ b/ckan/model/tracking.py
@@ -3,7 +3,15 @@
import meta
import domain_object
-__all__ = ['tracking_summary_table', 'TrackingSummary']
+__all__ = ['tracking_summary_table', 'TrackingSummary', 'tracking_raw_table']
+
+tracking_raw_table = Table('tracking_raw', meta.metadata,
+ Column('user_key', types.Unicode(100), nullable=False),
+ Column('url', types.UnicodeText, nullable=False),
+ Column('tracking_type', types.Unicode(10), nullable=False),
+ Column('access_timestamp', types.DateTime),
+ )
+
tracking_summary_table = Table('tracking_summary', meta.metadata,
Column('url', types.UnicodeText, primary_key=True, nullable=False),
diff --git a/ckan/templates/snippets/sort_by.html b/ckan/templates/snippets/sort_by.html
index a2a6076c3bc..c568e2464f9 100644
--- a/ckan/templates/snippets/sort_by.html
+++ b/ckan/templates/snippets/sort_by.html
@@ -11,11 +11,13 @@
diff --git a/ckan/tests/functional/test_pagination.py b/ckan/tests/functional/test_pagination.py
index e5ed5445910..94f0b9ee8d3 100644
--- a/ckan/tests/functional/test_pagination.py
+++ b/ckan/tests/functional/test_pagination.py
@@ -59,25 +59,25 @@ def test_package_search_p1(self):
res = self.app.get(url_for(controller='package', action='search', q='groups:group_00'))
assert 'href="/dataset?q=groups%3Agroup_00&page=2"' in res
pkg_numbers = scrape_search_results(res, 'dataset')
- assert_equal(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19'], pkg_numbers)
+ assert_equal(['50', '49', '48', '47', '46', '45', '44', '43', '42', '41', '40', '39', '38', '37', '36', '35', '34', '33', '32', '31'], pkg_numbers)
def test_package_search_p2(self):
res = self.app.get(url_for(controller='package', action='search', q='groups:group_00', page=2))
assert 'href="/dataset?q=groups%3Agroup_00&page=1"' in res
pkg_numbers = scrape_search_results(res, 'dataset')
- assert_equal(['20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39'], pkg_numbers)
+ assert_equal(['30', '29', '28', '27', '26', '25', '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14', '13', '12', '11'], pkg_numbers)
def test_group_datasets_read_p1(self):
res = self.app.get(url_for(controller='group', action='read', id='group_00'))
assert 'href="/group/group_00?page=2' in res, res
pkg_numbers = scrape_search_results(res, 'group_dataset')
- assert_equal(['00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19'], pkg_numbers)
+ assert_equal(['50', '49', '48', '47', '46', '45', '44', '43', '42', '41', '40', '39', '38', '37', '36', '35', '34', '33', '32', '31'], pkg_numbers)
def test_group_datasets_read_p2(self):
res = self.app.get(url_for(controller='group', action='read', id='group_00', page=2))
assert 'href="/group/group_00?page=1' in res, res
pkg_numbers = scrape_search_results(res, 'group_dataset')
- assert_equal(['20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39'], pkg_numbers)
+ assert_equal(['30', '29', '28', '27', '26', '25', '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14', '13', '12', '11'], pkg_numbers)
class TestPaginationGroup(TestController):
@classmethod
diff --git a/ckan/tests/lib/test_solr_package_search.py b/ckan/tests/lib/test_solr_package_search.py
index 37980a9f5c0..98939c2d479 100644
--- a/ckan/tests/lib/test_solr_package_search.py
+++ b/ckan/tests/lib/test_solr_package_search.py
@@ -363,7 +363,7 @@ def teardown_class(self):
def _do_search(self, q, expected_pkgs, count=None):
query = {
'q': q,
- 'sort': 'rank'
+ 'sort': 'score desc, name asc'
}
result = search.query_for(model.Package).run(query)
pkgs = result['results']
@@ -472,7 +472,7 @@ def teardown_class(self):
def _do_search(self, q, wanted_results):
query = {
'q': q,
- 'sort': 'rank'
+ 'sort': 'score desc, name asc',
}
result = search.query_for(model.Package).run(query)
results = result['results']
diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py
index 228e62e962c..b0602807358 100644
--- a/ckan/tests/logic/test_action.py
+++ b/ckan/tests/logic/test_action.py
@@ -1306,7 +1306,7 @@ def test_1_basic_no_params(self):
result = res['result']
assert_equal(res['success'], True)
assert_equal(result['count'], 2)
- assert_equal(result['results'][0]['name'], 'annakarenina')
+ assert result['results'][0]['name'] in ('annakarenina', 'warandpeace')
# Test GET request
res = self.app.get('/api/action/package_search')
@@ -1314,7 +1314,7 @@ def test_1_basic_no_params(self):
result = res['result']
assert_equal(res['success'], True)
assert_equal(result['count'], 2)
- assert_equal(result['results'][0]['name'], 'annakarenina')
+ assert result['results'][0]['name'] in ('annakarenina', 'warandpeace')
def test_2_bad_param(self):
postparams = '%s=1' % json.dumps({
diff --git a/doc/images/popular-dataset.png b/doc/images/popular-dataset.png
new file mode 100644
index 00000000000..a64b392410a
Binary files /dev/null and b/doc/images/popular-dataset.png differ
diff --git a/doc/images/popular-resource.png b/doc/images/popular-resource.png
new file mode 100644
index 00000000000..4f99e9887ce
Binary files /dev/null and b/doc/images/popular-resource.png differ
diff --git a/doc/images/sort-datasets-by-popularity.png b/doc/images/sort-datasets-by-popularity.png
new file mode 100644
index 00000000000..87343b96df2
Binary files /dev/null and b/doc/images/sort-datasets-by-popularity.png differ
diff --git a/doc/index.rst b/doc/index.rst
index aad7b737ca1..5b4caf8c908 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -49,6 +49,7 @@ Customizing and Extending
geospatial
multilingual
email-notifications
+ tracking
Publishing Datasets
===================
diff --git a/doc/tracking.rst b/doc/tracking.rst
new file mode 100644
index 00000000000..384028e8a76
--- /dev/null
+++ b/doc/tracking.rst
@@ -0,0 +1,116 @@
+==================
+Page View Tracking
+==================
+
+CKAN can track visits to pages of your site and use this tracking data to:
+
+* Sort datasets by popularity
+* Highlight popular datasets and resources
+* Show view counts next to datasets and resources
+* Show a list of the most popular datasets
+* Export page-view data to a CSV file
+
+.. seealso::
+
+ `ckanext-googleanalytics `_
+ A CKAN extension that integrates Google Analytics into CKAN.
+
+
+Enabling Page View Tracking
+===========================
+
+To enable page view tracking:
+
+1. Put ``ckan.tracking_enabled = true`` in the ``[app:main]`` section of your
+ CKAN configuration file (e.g. ``development.ini`` or ``production.ini``)::
+
+ [app:main]
+ ckan.tracking_enabled = true
+
+ Save the file and restart your web server. CKAN will now record raw page
+ view tracking data in your CKAN database as pages are viewed.
+
+2. Setup a cron job to update the tracking summary data.
+
+ For operations based on the tracking data CKAN uses a summarised version of
+ the data, not the raw tracking data that is recorded "live" as page views
+ happen. The ``paster tracking update`` and ``paster search-index rebuild``
+ commands need to be run periodicially to update this tracking summary data.
+
+ You can setup a cron job to run these commands. On most UNIX systems you can
+ setup a cron job by running ``crontab -e`` in a shell to edit your crontab
+ file, and adding a line to the file to specify the new job. For more
+ information run ``man crontab`` in a shell. For example, here is a crontab
+ line to update the tracking data and rebuild the search index hourly::
+
+ @hourly /usr/lib/ckan/bin/paster --plugin=ckan tracking update -c /etc/ckan/production.ini && /usr/lib/ckan/bin/paster --plugin=ckan search-index rebuild -r -c /etc/ckan/production.ini
+
+ Replace ``/usr/lib/ckan/bin/`` with the path to the ``bin`` directory of the
+ virtualenv that you've installed CKAN into, and replace ``/etc/ckan/production.ini``
+ with the path to your CKAN configuration file.
+
+ The ``@hourly`` can be replaced with ``@daily``, ``@weekly`` or
+ ``@monthly``.
+
+
+Retrieving Tracking Data
+========================
+
+Tracking summary data for datasets and resources is available in the dataset
+and resource dictionaries returned by, for example, the ``package_show()``
+API::
+
+ "tracking_summary": {
+ "recent": 5,
+ "total": 15
+ },
+
+This can be used, for example, by custom templates to show the number of views
+next to datasets and resources. A dataset or resource's ``recent`` count is
+its number of views in the last 14 days, the ``total`` count is all of its
+tracked views (including recent ones).
+
+You can also export tracking data for all datasets to a CSV file using the
+``paster tracking export`` command. For details, run ``paster tracking -h``.
+
+.. note::
+
+ Repeatedly visiting the same page will not increase the page's view count!
+ Page view counting is limited to one view per user per page per day.
+
+
+Sorting Datasets by Popularity
+==============================
+
+Once you've enabled page view tracking on your CKAN site, you can view datasets
+most-popular-first by selecting ``Popular`` from the ``Order by:`` dropdown on
+the dataset search page:
+
+.. image:: images/sort-datasets-by-popularity.png
+
+The datasets are sorted by their number of recent views.
+
+You can retrieve datasets most-popular-first from the
+:doc:`CKAN API ` by passing ``'sort': 'views_recent desc'`` to the
+``package_search()`` action. This could be used, for example, by a custom
+template to show a list of the most popular datasets on the site's front page.
+
+.. tip::
+
+ You can also sort datasets by total views rather than recent views. Pass
+ ``'sort': 'views_total desc'`` to the ``package_search()`` API, or use the
+ URL ``/dataset?q=&sort=views_total+desc`` in the web interface.
+
+
+Highlighting Popular Datasets and Resources
+===========================================
+
+Once you've enabled page view tracking on your CKAN site, popular datasets and
+resources (those with more than 10 views) will be highlighted with a "popular"
+badge and a tooltip showing the number of views:
+
+.. image:: images/popular-dataset.png
+
+.. image:: images/popular-resource.png
+
+