From a31444a361421639eff1ea8eee31bf873017bf21 Mon Sep 17 00:00:00 2001
From: amercader <amercadero@gmail.com>
Date: Fri, 16 May 2014 16:25:21 +0100
Subject: [PATCH 01/36] [#1723] Do not show create fields in member edit

---
 ckan/templates/group/member_new.html        | 43 ++++++++++++---------
 ckan/templates/organization/member_new.html | 42 +++++++++++---------
 2 files changed, 47 insertions(+), 38 deletions(-)
diff --git a/ckan/templates/group/member_new.html b/ckan/templates/group/member_new.html
index f7b24c236fe..ce03a7f69a2 100644
--- a/ckan/templates/group/member_new.html
+++ b/ckan/templates/group/member_new.html
@@ -13,12 +13,14 @@ <h1 class="page-heading">
   <form class="dataset-form form-horizontal add-member-form" method='post'>
     <div class="row-fluid">
       <div class="control-group control-medium">
-        <label class="control-label" for="username">
-          {{ _('Existing User') }}
-        </label>
-        <span>
-          {{ _('If you wish to add an existing user, search for their username below.') }}
-        </span>
+        {% if not user %}
+          <label class="control-label" for="username">
+            {{ _('Existing User') }}
+          </label>
+          <span>
+            {{ _('If you wish to add an existing user, search for their username below.') }}
+          </span>
+        {% endif %}
         <div class="controls">
           {% if user %}
             <input type="hidden" name="username" value="{{ user.name }}" />
@@ -31,21 +33,24 @@ <h1 class="page-heading">
           {% endif %}
         </div>
       </div>
-      <div class="add-member-or">
-        {{ _('or') }}
-      </div>
-      <div class="control-group control-medium">
-        <label class="control-label" for="email">
-          {{ _('New User') }}
-        </label>
-        <span>
-          {{ _('If you wish to invite a new user, enter their email address.') }}
-        </span>
-        <div class="controls">
-          <input id="email" type="text" name="email" placeholder="Email address">
+      {% if not user %}
+        <div class="add-member-or">
+          {{ _('or') }}
         </div>
-      </div>
+        <div class="control-group control-medium">
+          <label class="control-label" for="email">
+            {{ _('New User') }}
+          </label>
+          <span>
+            {{ _('If you wish to invite a new user, enter their email address.') }}
+          </span>
+          <div class="controls">
+            <input id="email" type="text" name="email" placeholder="Email address">
+          </div>
+        </div>
+      {% endif %}
     </div>
+
     {% set format_attrs = {'data-module': 'autocomplete'} %}
     {{ form.select('role', label=_('Role'), options=c.roles, selected=c.user_role, error='', attrs=format_attrs) }}
     <div class="form-actions">
diff --git a/ckan/templates/organization/member_new.html b/ckan/templates/organization/member_new.html
index cc4ac8c7dd4..8c0efd0e6e8 100644
--- a/ckan/templates/organization/member_new.html
+++ b/ckan/templates/organization/member_new.html
@@ -15,12 +15,14 @@ <h1 class="page-heading">
   <form class="dataset-form form-horizontal add-member-form" method='post'>
     <div class="row-fluid">
       <div class="control-group control-medium">
-        <label class="control-label" for="username">
-          {{ _('Existing User') }}
-        </label>
-        <span>
-          {{ _('If you wish to add an existing user, search for their username below.') }}
-        </span>
+        {% if not user %}
+          <label class="control-label" for="username">
+            {{ _('Existing User') }}
+          </label>
+          <span>
+            {{ _('If you wish to add an existing user, search for their username below.') }}
+          </span>
+        {% endif %}
         <div class="controls">
           {% if user %}
             <input type="hidden" name="username" value="{{ user.name }}" />
@@ -33,20 +35,22 @@ <h1 class="page-heading">
           {% endif %}
         </div>
       </div>
-      <div class="add-member-or">
-        {{ _('or') }}
-      </div>
-      <div class="control-group control-medium">
-        <label class="control-label" for="email">
-          {{ _('New User') }}
-        </label>
-        <span>
-          {{ _('If you wish to invite a new user, enter their email address.') }}
-        </span>
-        <div class="controls">
-          <input id="email" type="text" name="email" placeholder="Email address">
+      {% if not user %}
+        <div class="add-member-or">
+          {{ _('or') }}
         </div>
-      </div>
+        <div class="control-group control-medium">
+          <label class="control-label" for="email">
+            {{ _('New User') }}
+          </label>
+          <span>
+            {{ _('If you wish to invite a new user, enter their email address.') }}
+          </span>
+          <div class="controls">
+            <input id="email" type="text" name="email" placeholder="Email address">
+          </div>
+        </div>
+      {% endif %}
     </div>
     {% set format_attrs = {'data-module': 'autocomplete'} %}
     {{ form.select('role', label=_('Role'), options=c.roles, selected=c.user_role, error='', attrs=format_attrs) }}

From 8b21b84cc9399e55078bdef50b0d3d37e2f2f90d Mon Sep 17 00:00:00 2001
From: amercader <amercadero@gmail.com>
Date: Fri, 16 May 2014 17:44:40 +0100
Subject: [PATCH 02/36] [#1701] Move and clean up search index tests

---
 ckan/new_tests/lib/search/test_index.py  |  94 +++++++++++++++++++++
 ckan/tests/lib/test_solr_search_index.py | 102 -----------------------
 2 files changed, 94 insertions(+), 102 deletions(-)
 create mode 100644 ckan/new_tests/lib/search/test_index.py

diff --git a/ckan/new_tests/lib/search/test_index.py b/ckan/new_tests/lib/search/test_index.py
new file mode 100644
index 00000000000..86cb34ba53b
--- /dev/null
+++ b/ckan/new_tests/lib/search/test_index.py
@@ -0,0 +1,94 @@
+import datetime
+import hashlib
+import nose
+
+from pylons import config
+import ckan.lib.search as search
+
+
+eq_ = nose.tools.eq_
+
+
+class TestSearchIndex(object):
+
+    @classmethod
+    def setup_class(cls):
+
+        if not search.is_available():
+            raise nose.SkipTest('Solr not reachable')
+
+        cls.solr_client = search.make_connection()
+
+        cls.fq = " +site_id:\"%s\" " % config['ckan.site_id']
+
+        cls.package_index = search.PackageSearchIndex()
+
+        cls.base_package_dict = {
+            'id': 'test-index',
+            'name': 'monkey',
+            'title': 'Monkey',
+            'state': 'active',
+            'private': False,
+            'type': 'dataset',
+            'owner_org': None,
+            'metadata_created': datetime.datetime.now().isoformat(),
+            'metadata_modified': datetime.datetime.now().isoformat(),
+        }
+
+    def teardown(self):
+        # clear the search index after every test
+        self.package_index.clear()
+
+    def test_index_basic(self):
+
+        self.package_index.index_package(self.base_package_dict)
+
+        response = self.solr_client.query('name:monkey', fq=self.fq)
+
+        eq_(len(response), 1)
+
+        eq_(response.results[0]['id'], 'test-index')
+        eq_(response.results[0]['name'], 'monkey')
+        eq_(response.results[0]['title'], 'Monkey')
+
+        index_id = hashlib.md5(
+            '{0}{1}'.format(self.base_package_dict['id'],
+                            config['ckan.site_id'])
+        ).hexdigest()
+
+        eq_(response.results[0]['index_id'], index_id)
+
+    def test_no_state_no_index(self):
+        pkg_dict = self.base_package_dict.copy()
+        pkg_dict.update({
+            'state': None,
+        })
+
+        self.package_index.index_package(pkg_dict)
+
+        response = self.solr_client.query('name:monkey', fq=self.fq)
+
+        eq_(len(response), 0)
+
+    def test_clear_index(self):
+
+        self.package_index.index_package(self.base_package_dict)
+
+        self.package_index.clear()
+
+        response = self.solr_client.query('name:monkey', fq=self.fq)
+        eq_(len(response), 0)
+
+    def test_index_illegal_xml_chars(self):
+
+        pkg_dict = self.base_package_dict.copy()
+        pkg_dict.update({
+            'title': u'\u00c3a\u0001ltimo n\u00famero penguin',
+            'notes': u'\u00c3a\u0001ltimo n\u00famero penguin',
+        })
+        self.package_index.index_package(pkg_dict)
+
+        response = self.solr_client.query('name:monkey', fq=self.fq)
+
+        eq_(len(response), 1)
+        eq_(response.results[0]['title'], u'\u00c3altimo n\u00famero penguin')
diff --git a/ckan/tests/lib/test_solr_search_index.py b/ckan/tests/lib/test_solr_search_index.py
index f8c518bc2da..173e4d9f350 100644
--- a/ckan/tests/lib/test_solr_search_index.py
+++ b/ckan/tests/lib/test_solr_search_index.py
@@ -29,108 +29,6 @@ def test_solr_url_exists(self):
                 raise AssertionError('SOLR connection problem. Connection defined in development.ini as: solr_url=%s Error: %s' % (config['solr_url'], e))
 
 
-class TestSolrSearchIndex(TestController):
-    """
-    Tests that a package is indexed when the packagenotification is
-    received by the indexer.
-    """
-    @classmethod
-    def setup_class(cls):
-        setup_test_search_index()
-        CreateTestData.create()
-        cls.solr = search.make_connection()
-        cls.fq = " +site_id:\"%s\" " % config['ckan.site_id']
-
-    @classmethod
-    def teardown_class(cls):
-        model.repo.rebuild_db()
-        cls.solr.close()
-
-    def teardown(self):
-        # clear the search index after every test
-        search.index_for('Package').clear()
-
-    def _get_index_id(self,pkg_id):
-        return hashlib.md5('%s%s' % (pkg_id,config['ckan.site_id'])).hexdigest()
-
-    def test_index(self):
-
-        datetime_now = datetime.now()
-        pkg_dict = {
-            'id': u'penguin-id',
-            'title': u'penguin',
-            'state': u'active',
-            'type': u'dataset',
-            'private': False,
-            'owner_org': None,
-            'metadata_created': datetime_now.isoformat(),
-            'metadata_modified': datetime_now.isoformat(),
-            'extras': [
-                {'key': 'test_date', 'value': '2013-03-01'},
-                {'key': 'test_wrong_date', 'value': 'Not a date'},
-            ]
-        }
-        search.dispatch_by_operation('Package', pkg_dict, 'new')
-        response = self.solr.query('title:penguin', fq=self.fq)
-        assert len(response) == 1, len(response)
-        assert response.results[0]['index_id'] == self._get_index_id (pkg_dict['id'])
-        assert response.results[0]['title'] == 'penguin'
-
-        # looks like solrpy messes with microseconds and time zones,
-        # so ignore them for testing
-        assert datetime_now.strftime('%Y-%m-%d %H:%M:%S') == response.results[0]['metadata_created'].strftime('%Y-%m-%d %H:%M:%S')
-        assert datetime_now.strftime('%Y-%m-%d %H:%M:%S') == response.results[0]['metadata_modified'].strftime('%Y-%m-%d %H:%M:%S')
-
-    def test_no_state_not_indexed(self):
-        pkg_dict = {
-            'title': 'penguin'
-        }
-        search.dispatch_by_operation('Package', pkg_dict, 'new')
-        response = self.solr.query('title:penguin', fq=self.fq)
-        assert len(response) == 0, len(response)
-
-    def test_index_clear(self):
-        pkg_dict = {
-            'id': u'penguin-id',
-            'title': u'penguin',
-            'state': u'active',
-            'type': u'dataset',
-            'private': False,
-            'owner_org': None,
-            'metadata_created': datetime.now().isoformat(),
-            'metadata_modified': datetime.now().isoformat(),
-        }
-        search.dispatch_by_operation('Package', pkg_dict, 'new')
-        response = self.solr.query('title:penguin', fq=self.fq)
-        assert len(response) == 1, len(response)
-        search.index_for('Package').clear()
-        response = self.solr.query('title:penguin', fq=self.fq)
-        assert len(response) == 0
-        # clear whilst empty
-        search.index_for('Package').clear()
-        response = self.solr.query('title:penguin', fq=self.fq)
-        assert len(response) == 0
-
-    def test_index_illegal_xml_chars(self):
-
-        pkg_dict = {
-            'id': u'penguin-id',
-            'title': u'\u00c3a\u0001ltimo n\u00famero penguin',
-            'notes': u'\u00c3a\u0001ltimo n\u00famero penguin',
-            'state': u'active',
-            'type': u'dataset',
-            'private': False,
-            'owner_org': None,
-            'metadata_created': datetime.now().isoformat(),
-            'metadata_modified': datetime.now().isoformat(),
-        }
-        search.dispatch_by_operation('Package', pkg_dict, 'new')
-        response = self.solr.query('title:penguin', fq=self.fq)
-        assert len(response) == 1, len(response)
-        assert response.results[0]['index_id'] == self._get_index_id (pkg_dict['id'])
-        assert response.results[0]['title'] == u'\u00c3altimo n\u00famero penguin'
-
-
 class TestSolrSearch:
     @classmethod
     def setup_class(cls):

From dd2c1ae1e5b6dc835e6021e1d3de53c3040f04d9 Mon Sep 17 00:00:00 2001
From: amercader <amercadero@gmail.com>
Date: Fri, 16 May 2014 18:01:50 +0100
Subject: [PATCH 03/36] [#1701] Add tests for date fields, including a failing
 one for #1701

---
 ckan/new_tests/lib/search/test_index.py | 58 +++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/ckan/new_tests/lib/search/test_index.py b/ckan/new_tests/lib/search/test_index.py
index 86cb34ba53b..d212625ece7 100644
--- a/ckan/new_tests/lib/search/test_index.py
+++ b/ckan/new_tests/lib/search/test_index.py
@@ -92,3 +92,61 @@ def test_index_illegal_xml_chars(self):
 
         eq_(len(response), 1)
         eq_(response.results[0]['title'], u'\u00c3altimo n\u00famero penguin')
+
+    def test_index_date_field(self):
+
+        pkg_dict = self.base_package_dict.copy()
+        pkg_dict.update({
+            'extras': [
+                {'key': 'test_date', 'value': '2014-03-22'},
+                {'key': 'test_tim_date', 'value': '2014-03-22 05:42:14'},
+            ]
+        })
+
+        self.package_index.index_package(pkg_dict)
+
+        response = self.solr_client.query('name:monkey', fq=self.fq)
+
+        eq_(len(response), 1)
+
+        assert isinstance(response.results[0]['test_date'], datetime.datetime)
+        eq_(response.results[0]['test_date'].strftime('%Y-%m-%d'),
+            '2014-03-22')
+        eq_(response.results[0]['test_tim_date'].strftime('%Y-%m-%d %H:%M:%S'),
+            '2014-03-22 05:42:14')
+
+    def test_index_date_field_wrong_value(self):
+
+        pkg_dict = self.base_package_dict.copy()
+        pkg_dict.update({
+            'extras': [
+                {'key': 'test_wrong_date', 'value': 'Not a date'},
+                {'key': 'test_another_wrong_date', 'value': '2014-13-01'},
+            ]
+        })
+
+        self.package_index.index_package(pkg_dict)
+
+        response = self.solr_client.query('name:monkey', fq=self.fq)
+
+        eq_(len(response), 1)
+
+        assert 'test_wrong_date' not in response.results[0]
+        assert 'test_another_wrong_date' not in response.results[0]
+
+    def test_index_date_field_empty_value(self):
+
+        pkg_dict = self.base_package_dict.copy()
+        pkg_dict.update({
+            'extras': [
+                {'key': 'test_empty_date', 'value': ''},
+            ]
+        })
+
+        self.package_index.index_package(pkg_dict)
+
+        response = self.solr_client.query('name:monkey', fq=self.fq)
+
+        eq_(len(response), 1)
+
+        assert 'test_empty_date' not in response.results[0]

From 9cf14977c8059ee21eb343bc9fe503b8cc482620 Mon Sep 17 00:00:00 2001
From: amercader <amercadero@gmail.com>
Date: Fri, 16 May 2014 18:11:31 +0100
Subject: [PATCH 04/36] [#1701] Don't index empty dates as the current one

When parsing an empty string, dateutil will always use a default one,
which by default is now(). We use here a fake date to see if that was
the case, and if so remove the value from the dict to index.
---
 ckan/lib/search/index.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py
index 1f6d8bed8ae..902ddd2ae9f 100644
--- a/ckan/lib/search/index.py
+++ b/ckan/lib/search/index.py
@@ -3,6 +3,7 @@
 import logging
 import collections
 import json
+import datetime
 from dateutil.parser import parse
 
 import re
@@ -217,11 +218,18 @@ def index_package(self, pkg_dict, defer_commit=False):
         # be needed?  For my data not changing the keys seems to not cause a
         # problem.
         new_dict = {}
+        bogus_date = datetime.datetime(1, 1, 1)
         for key, value in pkg_dict.items():
             key = key.encode('ascii', 'ignore')
             if key.endswith('_date'):
                 try:
-                    value = parse(value).isoformat() + 'Z'
+                    date = parse(value, default=bogus_date)
+                    if date != bogus_date:
+                        value = date.isoformat() + 'Z'
+                    else:
+                        # The date field was empty, so dateutil filled it with
+                        # the default bogus date
+                        value = None
                 except ValueError:
                     continue
             new_dict[key] = value

From 1245ff21f24ff424bfde3fef76e47f4ece3e385f Mon Sep 17 00:00:00 2001
From: Vitor Baptista <vitor@vitorbaptista.com>
Date: Mon, 16 Jun 2014 18:11:29 -0300
Subject: [PATCH 05/36] [#1767] Fix bug with send_email_notifications not
 working with paster

The problem is that our validators notice that .send_email_notifications()
didn't call its auth functions and raise an Exception. But it shouldn't call
it's auth functions when being called with paster anyway, so it's correct.

This fixes that.
---
 ckan/logic/action/update.py                |  1 +
 ckan/new_tests/logic/action/test_update.py | 28 ++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py
index 3cbd2abbe8d..bb8cf3faaef 100644
--- a/ckan/logic/action/update.py
+++ b/ckan/logic/action/update.py
@@ -1063,6 +1063,7 @@ def dashboard_mark_activities_old(context, data_dict):
         model.repo.commit()
 
 
+@logic.auth_audit_exempt
 def send_email_notifications(context, data_dict):
     '''Send any pending activity stream notification emails to users.
 
diff --git a/ckan/new_tests/logic/action/test_update.py b/ckan/new_tests/logic/action/test_update.py
index 035e52f894b..9d344a109d0 100644
--- a/ckan/new_tests/logic/action/test_update.py
+++ b/ckan/new_tests/logic/action/test_update.py
@@ -3,12 +3,16 @@
 
 import nose.tools
 import mock
+import pylons.config as config
 
 import ckan.logic as logic
 import ckan.new_tests.helpers as helpers
 import ckan.new_tests.factories as factories
 
 
+assert_raises = nose.tools.assert_raises
+
+
 def datetime_from_string(s):
     '''Return a standard datetime.datetime object initialised from a string in
     the same format used for timestamps in dictized activities (the format
@@ -385,3 +389,27 @@ def test_resource_reorder(self):
         assert reordered_resource_urls == ["http://b.html",
                                            "http://c.html",
                                            "http://a.html"]
+
+
+class TestUpdateSendEmailNotifications(object):
+    @classmethod
+    def setup_class(cls):
+        cls._original_config = dict(config)
+        config['ckan.activity_streams_email_notifications'] = True
+
+    @classmethod
+    def teardown_class(cls):
+        config.clear()
+        config.update(cls._original_config)
+
+    @mock.patch('ckan.logic.action.update.request')
+    def test_calling_through_paster_doesnt_validates_auth(self, mock_request):
+        mock_request.environ.get.return_value = True
+        helpers.call_action('send_email_notifications')
+
+    @mock.patch('ckan.logic.action.update.request')
+    def test_not_calling_through_paster_validates_auth(self, mock_request):
+        mock_request.environ.get.return_value = False
+        assert_raises(logic.NotAuthorized, helpers.call_action,
+                      'send_email_notifications',
+                      context={'ignore_auth': False})

From 45c1d4e88acfbe9471e4848ab43400d60a136fe2 Mon Sep 17 00:00:00 2001
From: Vitor Baptista <vitor@vitorbaptista.com>
Date: Mon, 16 Jun 2014 18:31:44 -0300
Subject: [PATCH 06/36] Use assert_raises instead of the full
 nose.tools.assert_raises

---
 ckan/new_tests/logic/action/test_update.py | 38 +++++++++++-----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/ckan/new_tests/logic/action/test_update.py b/ckan/new_tests/logic/action/test_update.py
index 9d344a109d0..d3d5705906b 100644
--- a/ckan/new_tests/logic/action/test_update.py
+++ b/ckan/new_tests/logic/action/test_update.py
@@ -87,14 +87,14 @@ def test_user_update_with_id_that_does_not_exist(self):
         user_dict = factories.User.attributes()
         user_dict['id'] = "there's no user with this id"
 
-        nose.tools.assert_raises(logic.NotFound, helpers.call_action,
-                                 'user_update', **user_dict)
+        assert_raises(logic.NotFound, helpers.call_action,
+                      'user_update', **user_dict)
 
     def test_user_update_with_no_id(self):
         user_dict = factories.User.attributes()
         assert 'id' not in user_dict
-        nose.tools.assert_raises(logic.ValidationError, helpers.call_action,
-                                 'user_update', **user_dict)
+        assert_raises(logic.ValidationError, helpers.call_action,
+                      'user_update', **user_dict)
 
     ## START-FOR-LOOP-EXAMPLE
 
@@ -105,9 +105,9 @@ def test_user_update_with_invalid_name(self):
                          'a' * 200, 'Hi!', 'i++%')
         for name in invalid_names:
             user['name'] = name
-            nose.tools.assert_raises(logic.ValidationError,
-                                     helpers.call_action, 'user_update',
-                                     **user)
+            assert_raises(logic.ValidationError,
+                          helpers.call_action, 'user_update',
+                          **user)
 
     ## END-FOR-LOOP-EXAMPLE
 
@@ -118,8 +118,8 @@ def test_user_update_to_name_that_already_exists(self):
         # Try to update fred and change his user name to bob, which is already
         # bob's user name
         fred['name'] = bob['name']
-        nose.tools.assert_raises(logic.ValidationError, helpers.call_action,
-                                 'user_update', **fred)
+        assert_raises(logic.ValidationError, helpers.call_action,
+                      'user_update', **fred)
 
     def test_user_update_password(self):
         '''Test that updating a user's password works successfully.'''
@@ -143,8 +143,8 @@ def test_user_update_with_short_password(self):
         user = factories.User()
 
         user['password'] = 'xxx'  # This password is too short.
-        nose.tools.assert_raises(logic.ValidationError, helpers.call_action,
-                                 'user_update', **user)
+        assert_raises(logic.ValidationError, helpers.call_action,
+                      'user_update', **user)
 
     def test_user_update_with_empty_password(self):
         '''If an empty password is passed to user_update, nothing should
@@ -169,17 +169,17 @@ def test_user_update_with_null_password(self):
         user = factories.User()
 
         user['password'] = None
-        nose.tools.assert_raises(logic.ValidationError, helpers.call_action,
-                                 'user_update', **user)
+        assert_raises(logic.ValidationError, helpers.call_action,
+                      'user_update', **user)
 
     def test_user_update_with_invalid_password(self):
         user = factories.User()
 
         for password in (False, -1, 23, 30.7):
             user['password'] = password
-            nose.tools.assert_raises(logic.ValidationError,
-                                     helpers.call_action, 'user_update',
-                                     **user)
+            assert_raises(logic.ValidationError,
+                          helpers.call_action, 'user_update',
+                          **user)
 
     def test_user_update_without_email_address(self):
         '''You have to pass an email address when you call user_update.
@@ -197,9 +197,9 @@ def test_user_update_without_email_address(self):
         user = factories.User()
         del user['email']
 
-        nose.tools.assert_raises(logic.ValidationError,
-                                 helpers.call_action, 'user_update',
-                                 **user)
+        assert_raises(logic.ValidationError,
+                      helpers.call_action, 'user_update',
+                      **user)
 
     # TODO: Valid and invalid values for the rest of the user model's fields.
 

From 1fe8df11abe122dc001cee4c4d9e614426836466 Mon Sep 17 00:00:00 2001
From: Vitor Baptista <vitor@vitorbaptista.com>
Date: Fri, 20 Jun 2014 15:55:15 -0300
Subject: [PATCH 07/36] [#1776] Fix bug when upserting [] on JSON column in the
 datastore

We were testing that the new value wasn't `None` using `if value`. As `[]` is a
falsy value, we never went through that `if` and ended up trying to run an
invalid SQL query.

This commit fixes that by explictly testing `if value is not None`.

This was first discovered in
http://stackoverflow.com/questions/24207065/inserting-empty-arrays-in-json-type-fields-in-datastore
---
 ckanext/datastore/db.py                |  2 +-
 ckanext/datastore/tests/test_upsert.py | 29 ++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py
index 53775000752..d32221c2093 100644
--- a/ckanext/datastore/db.py
+++ b/ckanext/datastore/db.py
@@ -621,7 +621,7 @@ def upsert_data(context, data_dict):
 
             for field in fields:
                 value = record.get(field['id'])
-                if value and field['type'].lower() == 'nested':
+                if value is not None and field['type'].lower() == 'nested':
                     ## a tuple with an empty second value
                     record[field['id']] = (json.dumps(value), '')
 
diff --git a/ckanext/datastore/tests/test_upsert.py b/ckanext/datastore/tests/test_upsert.py
index 9d65700198a..82d61452f9a 100644
--- a/ckanext/datastore/tests/test_upsert.py
+++ b/ckanext/datastore/tests/test_upsert.py
@@ -13,6 +13,8 @@
 import ckanext.datastore.db as db
 from ckanext.datastore.tests.helpers import rebuild_all_dbs, set_url_type
 
+assert_equal = nose.tools.assert_equal
+
 
 class TestDatastoreUpsert(tests.WsgiAppCase):
     sysadmin_user = None
@@ -238,6 +240,33 @@ def test_upsert_non_existing_field(self):
 
         assert res_dict['success'] is False
 
+    def test_upsert_works_with_empty_list_in_json_field(self):
+        hhguide = u"hitchhiker's guide to the galaxy"
+
+        data = {
+            'resource_id': self.data['resource_id'],
+            'method': 'upsert',
+            'records': [{
+                'nested': [],
+                u'b\xfck': hhguide}]
+        }
+
+        postparams = '%s=1' % json.dumps(data)
+        auth = {'Authorization': str(self.sysadmin_user.apikey)}
+        res = self.app.post('/api/action/datastore_upsert', params=postparams,
+                            extra_environ=auth)
+        res_dict = json.loads(res.body)
+        assert res_dict['success'] is True, res_dict
+
+        c = self.Session.connection()
+        results = c.execute('select * from "{0}"'.format(data['resource_id']))
+        record = [r for r in results.fetchall() if r[2] == hhguide]
+        self.Session.remove()
+        assert len(record) == 1, record
+        assert_equal(json.loads(record[0][4].json),
+                     data['records'][0]['nested'])
+
+
 
 class TestDatastoreInsert(tests.WsgiAppCase):
     sysadmin_user = None

From 8aac0be9af4636e8dcae7fdba878c1a7713765da Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 10:05:04 +0000
Subject: [PATCH 08/36] Fix some Sphinx warnings

---
 doc/api.rst                             | 2 ++
 doc/contributing/i18n.rst               | 2 +-
 doc/extensions/adding-custom-fields.rst | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/api.rst b/doc/api.rst
index 68d33ce707a..c25dcbd5efc 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 ==========
 Page Moved
 ==========
diff --git a/doc/contributing/i18n.rst b/doc/contributing/i18n.rst
index b2c7baba6b5..8764fc831ab 100644
--- a/doc/contributing/i18n.rst
+++ b/doc/contributing/i18n.rst
@@ -192,6 +192,6 @@ managing translations:
       wait until the next minor release.
 
 * The *master* branch is not currently translated. Translations from the latest
-  stable line (see :ref:`ckan-releases`) are cherry-picked into master after each
+  stable line (see :ref:`releases`) are cherry-picked into master after each
   minor or patch release.
 
diff --git a/doc/extensions/adding-custom-fields.rst b/doc/extensions/adding-custom-fields.rst
index f7a5131f737..d1d6b6bdca9 100644
--- a/doc/extensions/adding-custom-fields.rst
+++ b/doc/extensions/adding-custom-fields.rst
@@ -192,7 +192,8 @@ with:
 Tag vocabularies
 ----------------
 If you need to add a custom field where the input options are restricted to a
-provided list of options, you can use tag vocabularies :doc:`/tag-vocabularies`.
+provided list of options, you can use tag vocabularies
+:doc:`/maintaining/tag-vocabularies`.
 We will need to create our vocabulary first. By calling
 :func:`~ckan.logic.action.vocabulary_create`. Add a function to your plugin.py
 above your plugin class.

From 46a2d6baada8ebb4abcd2614dfd50dd74b4e2bbc Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 11:16:44 +0000
Subject: [PATCH 09/36] Add a test for Sphinx errors or warnings

---
 ckan/new_tests/test_coding_standards.py | 63 +++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 ckan/new_tests/test_coding_standards.py

diff --git a/ckan/new_tests/test_coding_standards.py b/ckan/new_tests/test_coding_standards.py
new file mode 100644
index 00000000000..8b939770163
--- /dev/null
+++ b/ckan/new_tests/test_coding_standards.py
@@ -0,0 +1,63 @@
+'''A module for coding standards tests.
+
+These are tests that are not functional- or unit-testing any particular piece
+of CKAN code, but are checking coding standards. For example: checking that
+there are no errors in the Sphinx build, that there are no PEP8 problems,
+etc.
+
+'''
+import subprocess
+
+
+def test_building_the_docs():
+    '''There should be no warnings or errors when building the Sphinx docs.
+
+    This test unfortunately does take quite a long time to run - rebuilding the
+    docs from scratch just takes a long time.
+
+    This test will also fail is build_sphinx exits with non-zero status
+    (because subprocess.check_output() will raise an exception).
+
+    '''
+    try:
+        output = subprocess.check_output(
+            ['python', 'setup.py', 'build_sphinx', '--all-files', '--fresh-env'],
+            stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError as err:
+        assert False, "Building the docs failed: {err}".format(err=err)
+    output_lines = output.split('\n')
+
+    errors = [line for line in output_lines if 'ERROR' in line]
+    if errors:
+        assert False, ("Don't add any errors to the Sphinx build: "
+                       "{errors}".format(errors=errors))
+
+    warnings = [line for line in output_lines if 'WARNING' in line]
+
+    # Some warnings have been around for a long time and aren't easy to fix.
+    # These are allowed, but no more should be added.
+    allowed_warnings = [
+        'WARNING: duplicate label ckan.auth.create_user_via_web',
+        'WARNING: duplicate label ckan.auth.create_unowned_dataset',
+        'WARNING: duplicate label ckan.auth.user_create_groups',
+        'WARNING: duplicate label ckan.auth.anon_create_dataset',
+        'WARNING: duplicate label ckan.auth.user_delete_organizations',
+        'WARNING: duplicate label ckan.auth.create_user_via_api',
+        'WARNING: duplicate label ckan.auth.create_dataset_if_not_in_organization',
+        'WARNING: duplicate label ckan.auth.user_delete_groups',
+        'WARNING: duplicate label ckan.auth.user_create_organizations',
+        'WARNING: duplicate label ckan.auth.roles_that_cascade_to_sub_groups'
+        ]
+
+    # Remove the allowed warnings from the list of collected warnings.
+    warnings_to_remove = []
+    for allowed_warning in allowed_warnings:
+        for warning in warnings:
+            if allowed_warning in warning:
+                warnings_to_remove.append(warning)
+    new_warnings = [warning for warning in warnings
+                    if warning not in warnings_to_remove]
+
+    if new_warnings:
+        assert False, ("Don't add any new warnings to the Sphinx build: "
+                       "{warnings}".format(warnings=new_warnings))

From 1575808e72fe8d5aebeaf32f67f098d035fb1b52 Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 10:05:04 +0000
Subject: [PATCH 10/36] Fix some Sphinx warnings

---
 doc/api.rst                             | 2 ++
 doc/contributing/i18n.rst               | 2 +-
 doc/extensions/adding-custom-fields.rst | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/api.rst b/doc/api.rst
index 68d33ce707a..c25dcbd5efc 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 ==========
 Page Moved
 ==========
diff --git a/doc/contributing/i18n.rst b/doc/contributing/i18n.rst
index b2c7baba6b5..8764fc831ab 100644
--- a/doc/contributing/i18n.rst
+++ b/doc/contributing/i18n.rst
@@ -192,6 +192,6 @@ managing translations:
       wait until the next minor release.
 
 * The *master* branch is not currently translated. Translations from the latest
-  stable line (see :ref:`ckan-releases`) are cherry-picked into master after each
+  stable line (see :ref:`releases`) are cherry-picked into master after each
   minor or patch release.
 
diff --git a/doc/extensions/adding-custom-fields.rst b/doc/extensions/adding-custom-fields.rst
index f7a5131f737..d1d6b6bdca9 100644
--- a/doc/extensions/adding-custom-fields.rst
+++ b/doc/extensions/adding-custom-fields.rst
@@ -192,7 +192,8 @@ with:
 Tag vocabularies
 ----------------
 If you need to add a custom field where the input options are restricted to a
-provided list of options, you can use tag vocabularies :doc:`/tag-vocabularies`.
+provided list of options, you can use tag vocabularies
+:doc:`/maintaining/tag-vocabularies`.
 We will need to create our vocabulary first. By calling
 :func:`~ckan.logic.action.vocabulary_create`. Add a function to your plugin.py
 above your plugin class.

From 8f7a628b7f08fa59968078b2d728f4615db2086a Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 11:49:26 +0000
Subject: [PATCH 11/36] PEP8

---
 ckan/new_tests/test_coding_standards.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckan/new_tests/test_coding_standards.py b/ckan/new_tests/test_coding_standards.py
index 8b939770163..2a072afdeab 100644
--- a/ckan/new_tests/test_coding_standards.py
+++ b/ckan/new_tests/test_coding_standards.py
@@ -47,7 +47,7 @@ def test_building_the_docs():
         'WARNING: duplicate label ckan.auth.user_delete_groups',
         'WARNING: duplicate label ckan.auth.user_create_organizations',
         'WARNING: duplicate label ckan.auth.roles_that_cascade_to_sub_groups'
-        ]
+    ]
 
     # Remove the allowed warnings from the list of collected warnings.
     warnings_to_remove = []

From 086fdeb4e5232be9ff4cb998dbff1e9eff0fe9de Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 11:58:02 +0000
Subject: [PATCH 12/36] Print reason when building docs fails

Print the reason when the building the docs test fails because Sphinx
exits with non-zero. Should help debug this test on Travis..
---
 ckan/new_tests/test_coding_standards.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ckan/new_tests/test_coding_standards.py b/ckan/new_tests/test_coding_standards.py
index 2a072afdeab..2fb144d6d8d 100644
--- a/ckan/new_tests/test_coding_standards.py
+++ b/ckan/new_tests/test_coding_standards.py
@@ -24,7 +24,8 @@ def test_building_the_docs():
             ['python', 'setup.py', 'build_sphinx', '--all-files', '--fresh-env'],
             stderr=subprocess.STDOUT)
     except subprocess.CalledProcessError as err:
-        assert False, "Building the docs failed: {err}".format(err=err)
+        assert False, "Building the docs failed: {err}".format(
+            err=err.message + ' ' + err.output)
     output_lines = output.split('\n')
 
     errors = [line for line in output_lines if 'ERROR' in line]

From 9016127a15db1401a6c24630c6802cd2a2667d11 Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 12:26:57 +0000
Subject: [PATCH 13/36] Don't use subprocess.check_output()

It isn't in Python 2.6
---
 ckan/new_tests/test_coding_standards.py | 39 ++++++++++++++++++++++++-
 doc/conf.py                             | 39 ++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/ckan/new_tests/test_coding_standards.py b/ckan/new_tests/test_coding_standards.py
index 2fb144d6d8d..46890f1793e 100644
--- a/ckan/new_tests/test_coding_standards.py
+++ b/ckan/new_tests/test_coding_standards.py
@@ -9,6 +9,43 @@
 import subprocess
 
 
+# We implement our own check_output() function because
+# subprocess.check_output() isn't in Python 2.6.
+# This code is copy-pasted from Python 2.7:
+# http://hg.python.org/cpython/file/d37f963394aa/Lib/subprocess.py#l544
+def check_output(*popenargs, **kwargs):
+    r"""Run command with arguments and return its output as a byte string.
+
+    If the exit code was non-zero it raises a CalledProcessError.  The
+    CalledProcessError object will have the return code in the returncode
+    attribute and output in the output attribute.
+
+    The arguments are the same as for the Popen constructor.  Example:
+
+    >>> check_output(["ls", "-l", "/dev/null"])
+    'crw-rw-rw- 1 root root 1, 3 Oct 18  2007 /dev/null\n'
+
+    The stdout argument is not allowed as it is used internally.
+    To capture standard error in the result, use stderr=STDOUT.
+
+    >>> check_output(["/bin/sh", "-c",
+    ...               "ls -l non_existent_file ; exit 0"],
+    ...              stderr=STDOUT)
+    'ls: non_existent_file: No such file or directory\n'
+    """
+    if 'stdout' in kwargs:
+        raise ValueError('stdout argument not allowed, it will be overridden.')
+    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
+    output, unused_err = process.communicate()
+    retcode = process.poll()
+    if retcode:
+        cmd = kwargs.get("args")
+        if cmd is None:
+            cmd = popenargs[0]
+        raise subprocess.CalledProcessError(retcode, cmd, output=output)
+    return output
+
+
 def test_building_the_docs():
     '''There should be no warnings or errors when building the Sphinx docs.
 
@@ -20,7 +57,7 @@ def test_building_the_docs():
 
     '''
     try:
-        output = subprocess.check_output(
+        output = check_output(
             ['python', 'setup.py', 'build_sphinx', '--all-files', '--fresh-env'],
             stderr=subprocess.STDOUT)
     except subprocess.CalledProcessError as err:
diff --git a/doc/conf.py b/doc/conf.py
index 59b2ee361e8..87fc1dab8f3 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -115,6 +115,43 @@
 release = ckan.__version__
 
 
+# We implement our own check_output() function because
+# subprocess.check_output() isn't in Python 2.6.
+# This code is copy-pasted from Python 2.7:
+# http://hg.python.org/cpython/file/d37f963394aa/Lib/subprocess.py#l544
+def check_output(*popenargs, **kwargs):
+    r"""Run command with arguments and return its output as a byte string.
+
+    If the exit code was non-zero it raises a CalledProcessError.  The
+    CalledProcessError object will have the return code in the returncode
+    attribute and output in the output attribute.
+
+    The arguments are the same as for the Popen constructor.  Example:
+
+    >>> check_output(["ls", "-l", "/dev/null"])
+    'crw-rw-rw- 1 root root 1, 3 Oct 18  2007 /dev/null\n'
+
+    The stdout argument is not allowed as it is used internally.
+    To capture standard error in the result, use stderr=STDOUT.
+
+    >>> check_output(["/bin/sh", "-c",
+    ...               "ls -l non_existent_file ; exit 0"],
+    ...              stderr=STDOUT)
+    'ls: non_existent_file: No such file or directory\n'
+    """
+    if 'stdout' in kwargs:
+        raise ValueError('stdout argument not allowed, it will be overridden.')
+    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
+    output, unused_err = process.communicate()
+    retcode = process.poll()
+    if retcode:
+        cmd = kwargs.get("args")
+        if cmd is None:
+            cmd = popenargs[0]
+        raise subprocess.CalledProcessError(retcode, cmd, output=output)
+    return output
+
+
 def latest_release_tag():
     '''Return the name of the git tag for the latest stable release.
 
@@ -123,7 +160,7 @@ def latest_release_tag():
     This requires git to be installed.
 
     '''
-    git_tags = subprocess.check_output(['git', 'tag', '-l']).split()
+    git_tags = check_output(['git', 'tag', '-l']).split()
 
     # FIXME: We could do more careful pattern matching against ckan-X.Y.Z here.
     release_tags = [tag for tag in git_tags if tag.startswith('ckan-')]

From c0ed14574846e13522bb28d7be4bdf6c60765c35 Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 12:36:39 +0000
Subject: [PATCH 14/36] Print reason if getting CKAN version in docs fails

Trying to debug a problem building the docs on Travis..
---
 doc/conf.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index 87fc1dab8f3..c78c58308ec 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -160,11 +160,14 @@ def latest_release_tag():
     This requires git to be installed.
 
     '''
-    git_tags = check_output(['git', 'tag', '-l']).split()
+    git_tags = check_output(
+        ['git', 'tag', '-l'], stderr=subprocess.STDOUT).split()
 
     # FIXME: We could do more careful pattern matching against ckan-X.Y.Z here.
     release_tags = [tag for tag in git_tags if tag.startswith('ckan-')]
 
+    assert release_tags, git_tags
+
     # git tag -l prints out the tags in the right order anyway, but don't rely
     # on that, sort them again here for good measure.
     release_tags.sort()

From 58de32b978ad07a104b751ccf2ba87f8e9b9dc6e Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 13:00:38 +0000
Subject: [PATCH 15/36] Don't crash docs build if `git tag` fails

For some reason `git tag -l` seems to be outputting nothing on Travis,
which makes the docs build crash on Travis.

Instead of crashing just use an empty string for the CKAN version, which
will break the docs but no one reads the docs that Travis builds anyway.
---
 doc/conf.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index c78c58308ec..3e1bbab3b7f 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -166,13 +166,13 @@ def latest_release_tag():
     # FIXME: We could do more careful pattern matching against ckan-X.Y.Z here.
     release_tags = [tag for tag in git_tags if tag.startswith('ckan-')]
 
-    assert release_tags, git_tags
-
     # git tag -l prints out the tags in the right order anyway, but don't rely
     # on that, sort them again here for good measure.
     release_tags.sort()
 
-    return release_tags[-1]
+    if release_tags:
+        return release_tags[-1]
+    else return ''
 
 
 def latest_release_version():

From 30b09fdb93ffeb6e6d6168e47158262b5520c24d Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 13:29:10 +0000
Subject: [PATCH 16/36] Typo

---
 doc/conf.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index 3e1bbab3b7f..d49b38a6fa0 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -172,7 +172,8 @@ def latest_release_tag():
 
     if release_tags:
         return release_tags[-1]
-    else return ''
+    else:
+        return ''
 
 
 def latest_release_version():

From 5c4c0573f846f4aa46fbf118ea70060c3b18c4c4 Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 13:31:10 +0000
Subject: [PATCH 17/36] Move check_output() to shared function

Remove two duplicated check_output() functions.

This also fixes a Python 2.6 error that when the subprocess exited with
non-zero check_output() was calling CalledProcessError with an argument
that doesn't exist in 2.6.
---
 ckan/lib/util.py                        | 45 +++++++++++++++++++++++
 ckan/new_tests/test_coding_standards.py | 47 ++++---------------------
 doc/conf.py                             | 41 ++-------------------
 3 files changed, 54 insertions(+), 79 deletions(-)
 create mode 100644 ckan/lib/util.py

diff --git a/ckan/lib/util.py b/ckan/lib/util.py
new file mode 100644
index 00000000000..86d9c0e3d95
--- /dev/null
+++ b/ckan/lib/util.py
@@ -0,0 +1,45 @@
+'''Shared utility functions for any Python code to use.
+
+Unlike :py:func:`ckan.lib.helpers`, the functions in this module are not
+available to templates.
+
+'''
+import subprocess
+
+
+# We implement our own check_output() function because
+# subprocess.check_output() isn't in Python 2.6.
+# This code is copy-pasted from Python 2.7 and adapted to make it work with
+# Python 2.6.
+# http://hg.python.org/cpython/file/d37f963394aa/Lib/subprocess.py#l544
+def check_output(*popenargs, **kwargs):
+    r"""Run command with arguments and return its output as a byte string.
+
+    If the exit code was non-zero it raises a CalledProcessError.  The
+    CalledProcessError object will have the return code in the returncode
+    attribute and output in the output attribute.
+
+    The arguments are the same as for the Popen constructor.  Example:
+
+    >>> check_output(["ls", "-l", "/dev/null"])
+    'crw-rw-rw- 1 root root 1, 3 Oct 18  2007 /dev/null\n'
+
+    The stdout argument is not allowed as it is used internally.
+    To capture standard error in the result, use stderr=STDOUT.
+
+    >>> check_output(["/bin/sh", "-c",
+    ...               "ls -l non_existent_file ; exit 0"],
+    ...              stderr=STDOUT)
+    'ls: non_existent_file: No such file or directory\n'
+    """
+    if 'stdout' in kwargs:
+        raise ValueError('stdout argument not allowed, it will be overridden.')
+    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
+    output, unused_err = process.communicate()
+    retcode = process.poll()
+    if retcode:
+        cmd = kwargs.get("args")
+        if cmd is None:
+            cmd = popenargs[0]
+        raise subprocess.CalledProcessError(retcode, cmd)
+    return output
diff --git a/ckan/new_tests/test_coding_standards.py b/ckan/new_tests/test_coding_standards.py
index 46890f1793e..e27018c9c61 100644
--- a/ckan/new_tests/test_coding_standards.py
+++ b/ckan/new_tests/test_coding_standards.py
@@ -8,42 +8,7 @@
 '''
 import subprocess
 
-
-# We implement our own check_output() function because
-# subprocess.check_output() isn't in Python 2.6.
-# This code is copy-pasted from Python 2.7:
-# http://hg.python.org/cpython/file/d37f963394aa/Lib/subprocess.py#l544
-def check_output(*popenargs, **kwargs):
-    r"""Run command with arguments and return its output as a byte string.
-
-    If the exit code was non-zero it raises a CalledProcessError.  The
-    CalledProcessError object will have the return code in the returncode
-    attribute and output in the output attribute.
-
-    The arguments are the same as for the Popen constructor.  Example:
-
-    >>> check_output(["ls", "-l", "/dev/null"])
-    'crw-rw-rw- 1 root root 1, 3 Oct 18  2007 /dev/null\n'
-
-    The stdout argument is not allowed as it is used internally.
-    To capture standard error in the result, use stderr=STDOUT.
-
-    >>> check_output(["/bin/sh", "-c",
-    ...               "ls -l non_existent_file ; exit 0"],
-    ...              stderr=STDOUT)
-    'ls: non_existent_file: No such file or directory\n'
-    """
-    if 'stdout' in kwargs:
-        raise ValueError('stdout argument not allowed, it will be overridden.')
-    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
-    output, unused_err = process.communicate()
-    retcode = process.poll()
-    if retcode:
-        cmd = kwargs.get("args")
-        if cmd is None:
-            cmd = popenargs[0]
-        raise subprocess.CalledProcessError(retcode, cmd, output=output)
-    return output
+import ckan.lib.util as util
 
 
 def test_building_the_docs():
@@ -52,17 +17,17 @@ def test_building_the_docs():
     This test unfortunately does take quite a long time to run - rebuilding the
     docs from scratch just takes a long time.
 
-    This test will also fail is build_sphinx exits with non-zero status
-    (because subprocess.check_output() will raise an exception).
+    This test will also fail is build_sphinx exits with non-zero status.
 
     '''
     try:
-        output = check_output(
+        output = util.check_output(
             ['python', 'setup.py', 'build_sphinx', '--all-files', '--fresh-env'],
             stderr=subprocess.STDOUT)
     except subprocess.CalledProcessError as err:
-        assert False, "Building the docs failed: {err}".format(
-            err=err.message + ' ' + err.output)
+        assert False, (
+            "Building the docs failed with return code: {code}".format(
+                code=err.returncode))
     output_lines = output.split('\n')
 
     errors = [line for line in output_lines if 'ERROR' in line]
diff --git a/doc/conf.py b/doc/conf.py
index d49b38a6fa0..c1e1b93fc40 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -18,6 +18,8 @@
 import os
 import subprocess
 
+import ckan.lib.util as util
+
 # If your extensions (or modules documented by autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -115,43 +117,6 @@
 release = ckan.__version__
 
 
-# We implement our own check_output() function because
-# subprocess.check_output() isn't in Python 2.6.
-# This code is copy-pasted from Python 2.7:
-# http://hg.python.org/cpython/file/d37f963394aa/Lib/subprocess.py#l544
-def check_output(*popenargs, **kwargs):
-    r"""Run command with arguments and return its output as a byte string.
-
-    If the exit code was non-zero it raises a CalledProcessError.  The
-    CalledProcessError object will have the return code in the returncode
-    attribute and output in the output attribute.
-
-    The arguments are the same as for the Popen constructor.  Example:
-
-    >>> check_output(["ls", "-l", "/dev/null"])
-    'crw-rw-rw- 1 root root 1, 3 Oct 18  2007 /dev/null\n'
-
-    The stdout argument is not allowed as it is used internally.
-    To capture standard error in the result, use stderr=STDOUT.
-
-    >>> check_output(["/bin/sh", "-c",
-    ...               "ls -l non_existent_file ; exit 0"],
-    ...              stderr=STDOUT)
-    'ls: non_existent_file: No such file or directory\n'
-    """
-    if 'stdout' in kwargs:
-        raise ValueError('stdout argument not allowed, it will be overridden.')
-    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
-    output, unused_err = process.communicate()
-    retcode = process.poll()
-    if retcode:
-        cmd = kwargs.get("args")
-        if cmd is None:
-            cmd = popenargs[0]
-        raise subprocess.CalledProcessError(retcode, cmd, output=output)
-    return output
-
-
 def latest_release_tag():
     '''Return the name of the git tag for the latest stable release.
 
@@ -160,7 +125,7 @@ def latest_release_tag():
     This requires git to be installed.
 
     '''
-    git_tags = check_output(
+    git_tags = util.check_output(
         ['git', 'tag', '-l'], stderr=subprocess.STDOUT).split()
 
     # FIXME: We could do more careful pattern matching against ckan-X.Y.Z here.

From 871ebd28f9ffbebdb77eec6fb3fa4d25060de23a Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 13:35:25 +0000
Subject: [PATCH 18/36] Fix a typo in a docstring

---
 ckan/lib/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckan/lib/util.py b/ckan/lib/util.py
index 86d9c0e3d95..62dfe309d61 100644
--- a/ckan/lib/util.py
+++ b/ckan/lib/util.py
@@ -1,6 +1,6 @@
 '''Shared utility functions for any Python code to use.
 
-Unlike :py:func:`ckan.lib.helpers`, the functions in this module are not
+Unlike :py:mod:`ckan.lib.helpers`, the functions in this module are not
 available to templates.
 
 '''

From 599b6541772224b91a95c9d116ac1f17c4ff656a Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 13:55:49 +0000
Subject: [PATCH 19/36] Fix a Sphinx error on Travis

---
 doc/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index c1e1b93fc40..70025d687e8 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -138,7 +138,7 @@ def latest_release_tag():
     if release_tags:
         return release_tags[-1]
     else:
-        return ''
+        return 'COULD NOT DETECT VERSION NUMBER'
 
 
 def latest_release_version():

From 16d3163bed67587f5637cacc1871483db802720f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aitor=20Mag=C3=A1n?= <amagan@conwet.com>
Date: Wed, 2 Jul 2014 16:23:44 +0200
Subject: [PATCH 20/36] Fix #1710. Replace "missing_fields" by
 "non_existing_filed_names" in order to get correctly the list of non existing
 fields

---
 ckanext/datastore/db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py
index ff88870ff67..bd65d2947ee 100644
--- a/ckanext/datastore/db.py
+++ b/ckanext/datastore/db.py
@@ -599,7 +599,7 @@ def upsert_data(context, data_dict):
             if non_existing_filed_names:
                 raise ValidationError({
                     'fields': [u'fields "{0}" do not exist'.format(
-                        ', '.join(missing_fields))]
+                        ', '.join(non_existing_filed_names))]
                 })
 
             unique_values = [record[key] for key in unique_keys]

From f638f40444444e956f9c3cc931af0d8edc4f0a4a Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Wed, 2 Jul 2014 14:28:15 +0000
Subject: [PATCH 21/36] Fix a Sphinx error on Travis

---
 doc/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/conf.py b/doc/conf.py
index 70025d687e8..66ff5199529 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -138,7 +138,7 @@ def latest_release_tag():
     if release_tags:
         return release_tags[-1]
     else:
-        return 'COULD NOT DETECT VERSION NUMBER'
+        return 'COULD_NOT_DETECT_VERSION_NUMBER'
 
 
 def latest_release_version():

From dd5dd304968118f2244d4ea5a0e97173ee62675e Mon Sep 17 00:00:00 2001
From: Sean Hammond <git@seanh.cc>
Date: Mon, 7 Jul 2014 14:05:16 +0000
Subject: [PATCH 22/36] Minor tweak to Sphinx test

---
 ckan/new_tests/test_coding_standards.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ckan/new_tests/test_coding_standards.py b/ckan/new_tests/test_coding_standards.py
index e27018c9c61..f49ae9d2840 100644
--- a/ckan/new_tests/test_coding_standards.py
+++ b/ckan/new_tests/test_coding_standards.py
@@ -53,11 +53,13 @@ def test_building_the_docs():
     ]
 
     # Remove the allowed warnings from the list of collected warnings.
+    # Be sure to only remove one warning for each allowed warning.
     warnings_to_remove = []
     for allowed_warning in allowed_warnings:
         for warning in warnings:
             if allowed_warning in warning:
                 warnings_to_remove.append(warning)
+                break
     new_warnings = [warning for warning in warnings
                     if warning not in warnings_to_remove]
 

From cdfefcfa5d292fabc44c0a0b0c77682a85a17084 Mon Sep 17 00:00:00 2001
From: David Read <david.read@hackneyworkshop.com>
Date: Wed, 9 Jul 2014 15:17:09 +0000
Subject: [PATCH 23/36] [#1768] Convert group/organization_list to use
 group_dictize and take options. Some minor issues still.

---
 ckan/lib/dictization/model_dictize.py | 210 ++++++++++++++------------
 ckan/logic/action/get.py              |  50 +++++-
 2 files changed, 162 insertions(+), 98 deletions(-)

diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py
index 5de9f7567b6..0ff85e3295f 100644
--- a/ckan/lib/dictization/model_dictize.py
+++ b/ckan/lib/dictization/model_dictize.py
@@ -15,62 +15,31 @@
 ## package save
 
 def group_list_dictize(obj_list, context,
-                       sort_key=lambda x:x['display_name'], reverse=False,
-                       with_package_counts=True):
-
-    active = context.get('active', True)
-    with_private = context.get('include_private_packages', False)
-
-    if with_package_counts:
-        query = search.PackageSearchQuery()
-        q = {'q': '+capacity:public' if not with_private else '*:*',
-             'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
-             'facet.limit': -1, 'rows': 1}
-        query.run(q)
-
-    result_list = []
-
-    for obj in obj_list:
-        if context.get('with_capacity'):
-            obj, capacity = obj
-            group_dict = d.table_dictize(obj, context, capacity=capacity)
-        else:
-            group_dict = d.table_dictize(obj, context)
-        group_dict.pop('created')
-        if active and obj.state not in ('active', 'pending'):
-            continue
-
-        group_dict['display_name'] = (group_dict.get('title') or
-                                      group_dict.get('name'))
-
-        image_url = group_dict.get('image_url')
-        group_dict['image_display_url'] = image_url
-        if image_url and not image_url.startswith('http'):
-            #munge here should not have an effect only doing it incase
-            #of potential vulnerability of dodgy api input
-            image_url = munge.munge_filename(image_url)
-            group_dict['image_display_url'] = h.url_for_static(
-                'uploads/group/%s' % group_dict.get('image_url'),
-                qualified=True
-            )
-
-        if with_package_counts:
-            facets = query.facets
-            if obj.is_organization:
-                group_dict['packages'] = facets['owner_org'].get(obj.id, 0)
-            else:
-                group_dict['packages'] = facets['groups'].get(obj.name, 0)
-
-        if context.get('for_view'):
-            if group_dict['is_organization']:
-                plugin = plugins.IOrganizationController
-            else:
-                plugin = plugins.IGroupController
-            for item in plugins.PluginImplementations(plugin):
-                group_dict = item.before_view(group_dict)
+                       sort_key=lambda x: x['display_name'], reverse=False,
+                       with_package_counts=True,
+                       include_groups=False):
+
+    group_dictize_context = dict(context.items()[:])
+    # Set options to avoid any SOLR queries for each group, which would
+    # slow things further.
+    group_dictize_context.update((
+        ('include_dataset_count', with_package_counts),
+        ('dataset_count_instead_of_dicts', True),
+        ('include_groups', include_groups),
+        ('include_datasets', False),  # too slow - don't allow
+        ('include_users', False),  # too slow - don't allow
+        ))
+    if with_package_counts and 'dataset_counts' not in group_dictize_context:
+        group_dictize_context['dataset_counts'] = _get_group_package_counts()
+    if context.get('with_capacity'):
+        group_list = [group_dictize(group, group_dictize_context,
+                                    capacity=capacity)
+                      for group, capacity in obj_list]
+    else:
+        group_list = [group_dictize(group, group_dictize_context)
+                      for group in obj_list]
 
-        result_list.append(group_dict)
-    return sorted(result_list, key=sort_key, reverse=reverse)
+    return sorted(group_list, key=sort_key, reverse=reverse)
 
 def resource_list_dictize(res_list, context):
 
@@ -257,7 +226,10 @@ def package_dictize(pkg, context):
     context['with_capacity'] = False
     ## no package counts as cannot fetch from search index at the same
     ## time as indexing to it.
-    result_dict["groups"] = group_list_dictize(result, context,
+    group_list_dictize_context = dict(context.items()[:])
+    group_list_dictize_context['include_extras'] = False  # for speed
+    result_dict["groups"] = group_list_dictize(result,
+                                               group_list_dictize_context,
                                                with_package_counts=False)
     #owning organization
     group_rev = model.group_revision_table
@@ -323,54 +295,106 @@ def _get_members(context, group, member_type):
     return q.all()
 
 
-def group_dictize(group, context):
-    result_dict = d.table_dictize(group, context)
+def _get_group_package_counts():
+    '''For all public groups, return their package counts, as a SOLR facet'''
+    query = search.PackageSearchQuery()
+    q = {'q': '+capacity:public',
+            'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
+            'facet.limit': -1, 'rows': 1}
+    query.run(q)
+    return query.facets
 
-    result_dict['display_name'] = group.display_name
 
-    result_dict['extras'] = extras_dict_dictize(
-        group._extras, context)
+def group_dictize(group, context, **kw):
+    result_dict = d.table_dictize(group, context)
+    result_dict.update(kw)
 
     include_datasets = context.get('include_datasets', True)
+    include_dataset_count = context.get('include_dataset_count', True)
+    include_groups = context.get('include_groups', True)
+    include_tags = context.get('include_tags', True)
+    include_users = context.get('include_users', True)
+    include_extras = context.get('include_extras', True)
 
-    q = {
-        'facet': 'false',
-        'rows': 0,
-    }
-
-    if group.is_organization:
-        q['fq'] = 'owner_org:"{0}"'.format(group.id)
-    else:
-        q['fq'] = 'groups:"{0}"'.format(group.name)
+    result_dict['display_name'] = group.title or group.name
 
-    is_group_member = (context.get('user') and
-         new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
-    if is_group_member:
-        context['ignore_capacity_check'] = True
+    if include_extras:
+        result_dict['extras'] = extras_dict_dictize(
+            group._extras, context)
 
-    if include_datasets:
-        q['rows'] = 1000    # Only the first 1000 datasets are returned
-
-    context_ = dict((k, v) for (k, v) in context.items() if k != 'schema')
-    search_results = logic.get_action('package_search')(context_, q)
+    context['with_capacity'] = True
 
-    if include_datasets:
-        result_dict['packages'] = search_results['results']
+    if include_datasets or include_dataset_count:
+        # group_list traditionally returned count instead of the dicts. It is
+        # deprecated, but this behaviour is maintained for now by setting:
+        # dataset_count_instead_of_dicts = True
+        # dataset_counts = _get_group_package_counts()
+        dataset_count_instead_of_dicts = context.get('dataset_count_instead_of_dicts', False)
+        dataset_counts = context.get('dataset_counts')
+
+        if dataset_count_instead_of_dicts is False or dataset_counts is None:
+            # Ask SOLR for the packages for this org/group
+            q = {
+                'facet': 'false',
+                'rows': 0,
+            }
+
+            if group.is_organization:
+                q['fq'] = 'owner_org:"{0}"'.format(group.id)
+            else:
+                q['fq'] = 'groups:"{0}"'.format(group.name)
 
-    result_dict['package_count'] = search_results['count']
+            is_group_member = (context.get('user') and
+                new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
+            if is_group_member:
+                context['ignore_capacity_check'] = True
 
-    context['with_capacity'] = True
-    result_dict['tags'] = tag_list_dictize(
-        _get_members(context, group, 'tags'),
-        context)
+            if include_datasets and not context.get('for_view'):
+                q['rows'] = 1000    # Only the first 1000 datasets are returned
 
-    result_dict['groups'] = group_list_dictize(
-        _get_members(context, group, 'groups'),
-        context)
-
-    result_dict['users'] = user_list_dictize(
-        _get_members(context, group, 'users'),
-        context)
+            search_context = dict((k, v) for (k, v) in context.items() if k != 'schema')
+            search_results = logic.get_action('package_search')(search_context, q)
+            package_count = search_results['count']
+        else:
+            # Use the package_counts passed in
+            facets = dataset_counts
+            if group.is_organization:
+                package_count = facets['owner_org'].get(group.id, 0)
+            else:
+                package_count = facets['groups'].get(group.name, 0)
+
+        if dataset_count_instead_of_dicts:
+            result_dict['packages'] = package_count
+        elif include_datasets:
+            result_dict['packages'] = search_results['results']
+
+        result_dict['package_count'] = package_count
+
+    if include_tags:
+        result_dict['tags'] = tag_list_dictize(
+            _get_members(context, group, 'tags'),
+            context)
+
+    if include_groups:
+        group_list_context = dict(context.items()[:])
+        group_list_context.update((
+            ('include_tags', False),
+            ('include_users', False),
+            ))
+        result_dict['groups'] = group_list_dictize(
+            _get_members(group_list_context, group, 'groups'),
+            context)
+
+    import pdb; pdb.set_trace()
+    if include_users:
+        if 'is_group_member' not in dir():
+            # expensive call, so avoid if was done earlier in this method
+            is_group_member = (context.get('user') and
+                new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
+        if is_group_member:
+            result_dict['users'] = user_list_dictize(
+                _get_members(context, group, 'users'),
+                context)
 
     context['with_capacity'] = False
 
diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py
index a5687ecc62d..35927492606 100644
--- a/ckan/logic/action/get.py
+++ b/ckan/logic/action/get.py
@@ -8,6 +8,7 @@
 
 from pylons import config
 import sqlalchemy
+from paste.deploy.converters import asbool
 
 import ckan.lib.dictization
 import ckan.logic as logic
@@ -387,8 +388,13 @@ def _group_or_org_list(context, data_dict, is_org=False):
                                total=1)
 
     all_fields = data_dict.get('all_fields', None)
+    include_extras = asbool(data_dict.get('include_extras', False))
 
     query = model.Session.query(model.Group).join(model.GroupRevision)
+    if all_fields and include_extras:
+        # this does an eager load of the extras, avoiding an sql query every
+        # time group_list_dictize accesses a group's extra.
+        query = query.options(sqlalchemy.orm.joinedload(model.Group._extras))
     query = query.filter(model.GroupRevision.state == 'active')
     query = query.filter(model.GroupRevision.current == True)
     if groups:
@@ -404,9 +410,23 @@ def _group_or_org_list(context, data_dict, is_org=False):
     query = query.filter(model.GroupRevision.is_organization == is_org)
 
     groups = query.all()
-    group_list = model_dictize.group_list_dictize(groups, context,
-                                                  lambda x: x[sort_info[0][0]],
-                                                  sort_info[0][1] == 'desc')
+    group_list_context = dict(context.items()[:])
+    if not all_fields:
+        group_list_context.update((
+            ('include_tags', False),
+            ('include_extras', False),
+            ))
+    else:
+        group_list_context.update((
+            ('include_tags', asbool(data_dict.get('include_tags', False))),
+            ('include_extras', include_extras),
+            ))
+    group_list = model_dictize.group_list_dictize(
+        groups, group_list_context,
+        sort_key=lambda x: x[sort_info[0][0]],
+        reverse=sort_info[0][1] == 'desc',
+        with_package_counts=all_fields or sort_info[0][0] == 'packages',
+        include_groups=asbool(data_dict.get('include_groups', False)))
 
     if not all_fields:
         group_list = [group[ref_group_by] for group in group_list]
@@ -427,9 +447,19 @@ def group_list(context, data_dict):
     :param groups: a list of names of the groups to return, if given only
         groups whose names are in this list will be returned (optional)
     :type groups: list of strings
-    :param all_fields: return full group dictionaries instead of  just names
+    :param all_fields: return group dictionaries instead of just names. Only
+        core fields are returned - get some more using the include_* options
         (optional, default: ``False``)
     :type all_fields: boolean
+    :param include_extras: if all_fields, include the group extra fields
+        (optional, default: ``False``)
+    :type include_extras: boolean
+    :param include_tags: if all_fields, include the group tags
+        (optional, default: ``False``)
+    :type include_tags: boolean
+    :param include_groups: if all_fields, include the groups the groups are in
+        (optional, default: ``False``)
+    :type include_groups: boolean
 
     :rtype: list of strings
 
@@ -453,7 +483,17 @@ def organization_list(context, data_dict):
         if given only groups whose names are in this list will be
         returned (optional)
     :type organizations: list of strings
-    :param all_fields: return full group dictionaries instead of  just names
+    :param all_fields: return group dictionaries instead of just names. Only
+        core fields are returned - get some more using the include_* options
+        (optional, default: ``False``)
+    :type all_fields: boolean
+    :param include_extras: if all_fields, include the group extra fields
+        (optional, default: ``False``)
+    :type include_extras: boolean
+    :param include_tags: if all_fields, include the group tags
+        (optional, default: ``False``)
+    :type include_tags: boolean
+    :param include_groups: if all_fields, include the groups the groups are in
         (optional, default: ``False``)
     :type all_fields: boolean
 

From 123f715be7dfd6c96a3e5e93aaa1f3c76f39682f Mon Sep 17 00:00:00 2001
From: David Read <david.read@hackneyworkshop.com>
Date: Thu, 10 Jul 2014 21:19:32 +0000
Subject: [PATCH 24/36] [#1768] Add tests. Take advantage of dictization params
 where convenient. Refactor packages_field option.

---
 ckan/lib/dictization/model_dictize.py         | 163 ++++++++------
 ckan/logic/action/get.py                      |  33 +--
 .../lib/dictization/test_model_dictize.py     | 206 ++++++++++++++++++
 ckan/new_tests/logic/action/test_get.py       |  66 +++++-
 4 files changed, 372 insertions(+), 96 deletions(-)
 create mode 100644 ckan/new_tests/lib/dictization/test_model_dictize.py

diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py
index 0ff85e3295f..8e346d11286 100644
--- a/ckan/lib/dictization/model_dictize.py
+++ b/ckan/lib/dictization/model_dictize.py
@@ -1,3 +1,14 @@
+'''
+These dictize functions generally take a domain object (such as Package) and
+convert it to a dictionary, including related objects (e.g. for Package it
+includes PackageTags, PackageExtras, PackageGroup etc).
+
+The basic recipe is to call:
+
+    dictized = ckan.lib.dictization.table_dictize(domain_object)
+
+which builds the dictionary by iterating over the table columns.
+'''
 import datetime
 import urlparse
 
@@ -17,26 +28,38 @@
 def group_list_dictize(obj_list, context,
                        sort_key=lambda x: x['display_name'], reverse=False,
                        with_package_counts=True,
-                       include_groups=False):
+                       include_groups=False,
+                       include_tags=False,
+                       include_extras=False):
 
     group_dictize_context = dict(context.items()[:])
     # Set options to avoid any SOLR queries for each group, which would
     # slow things further.
-    group_dictize_context.update((
-        ('include_dataset_count', with_package_counts),
-        ('dataset_count_instead_of_dicts', True),
-        ('include_groups', include_groups),
-        ('include_datasets', False),  # too slow - don't allow
-        ('include_users', False),  # too slow - don't allow
-        ))
+    group_dictize_options = {
+            'packages_field': 'dataset_count' if with_package_counts else None,
+            # don't allow packages_field='datasets' as it is too slow
+            'include_groups': include_groups,
+            'include_tags': include_tags,
+            'include_extras': include_extras,
+            'include_users': False,  # too slow - don't allow
+            }
+        #('include_dataset_count', with_package_counts),
+        #('dataset_count_instead_of_dicts', True),
+        #('include_groups', include_groups),
+        #('include_datasets', False),  # too slow - don't allow
+        #('include_users', False),  # too slow - don't allow
+        #))
     if with_package_counts and 'dataset_counts' not in group_dictize_context:
-        group_dictize_context['dataset_counts'] = _get_group_package_counts()
+        # 'dataset_counts' will already be in the context in the case that
+        # group_list_dictize recurses via group_dictize (groups in groups)
+        group_dictize_context['dataset_counts'] = get_group_dataset_counts()
     if context.get('with_capacity'):
         group_list = [group_dictize(group, group_dictize_context,
-                                    capacity=capacity)
+                                    capacity=capacity, **group_dictize_options)
                       for group, capacity in obj_list]
     else:
-        group_list = [group_dictize(group, group_dictize_context)
+        group_list = [group_dictize(group, group_dictize_context,
+                                    **group_dictize_options)
                       for group in obj_list]
 
     return sorted(group_list, key=sort_key, reverse=reverse)
@@ -226,10 +249,8 @@ def package_dictize(pkg, context):
     context['with_capacity'] = False
     ## no package counts as cannot fetch from search index at the same
     ## time as indexing to it.
-    group_list_dictize_context = dict(context.items()[:])
-    group_list_dictize_context['include_extras'] = False  # for speed
-    result_dict["groups"] = group_list_dictize(result,
-                                               group_list_dictize_context,
+    ## tags, extras and sub-groups are not included for speed
+    result_dict["groups"] = group_list_dictize(result, context,
                                                with_package_counts=False)
     #owning organization
     group_rev = model.group_revision_table
@@ -295,27 +316,41 @@ def _get_members(context, group, member_type):
     return q.all()
 
 
-def _get_group_package_counts():
-    '''For all public groups, return their package counts, as a SOLR facet'''
+def get_group_dataset_counts():
+    '''For all public groups, return their dataset counts, as a SOLR facet'''
     query = search.PackageSearchQuery()
     q = {'q': '+capacity:public',
-            'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
-            'facet.limit': -1, 'rows': 1}
+         'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
+         'facet.limit': -1, 'rows': 1}
     query.run(q)
     return query.facets
 
 
-def group_dictize(group, context, **kw):
+def group_dictize(group, context,
+                  include_groups=True,
+                  include_tags=True,
+                  include_users=True,
+                  include_extras=True,
+                  packages_field='datasets',
+                  **kw):
+    '''
+    Turns a Group object and related into a dictionary. The related objects
+    like tags are included unless you specify it in the params.
+
+    :param packages_field: determines the format of the `packages` field - can
+    be `datasets`, `dataset_count`, `none_but_include_package_count` or None.
+    If set to `dataset_count` or `none_but_include_package_count` then you
+    can precalculate dataset counts in advance by supplying:
+    context['dataset_counts'] = get_group_dataset_counts()
+    '''
+    assert packages_field in ('datasets', 'dataset_count',
+                              'none_but_include_package_count', None)
+    if packages_field in ('dataset_count', 'none_but_include_package_count'):
+        dataset_counts = context.get('dataset_counts', None)
+
     result_dict = d.table_dictize(group, context)
     result_dict.update(kw)
 
-    include_datasets = context.get('include_datasets', True)
-    include_dataset_count = context.get('include_dataset_count', True)
-    include_groups = context.get('include_groups', True)
-    include_tags = context.get('include_tags', True)
-    include_users = context.get('include_users', True)
-    include_extras = context.get('include_extras', True)
-
     result_dict['display_name'] = group.title or group.name
 
     if include_extras:
@@ -324,77 +359,67 @@ def group_dictize(group, context, **kw):
 
     context['with_capacity'] = True
 
-    if include_datasets or include_dataset_count:
-        # group_list traditionally returned count instead of the dicts. It is
-        # deprecated, but this behaviour is maintained for now by setting:
-        # dataset_count_instead_of_dicts = True
-        # dataset_counts = _get_group_package_counts()
-        dataset_count_instead_of_dicts = context.get('dataset_count_instead_of_dicts', False)
-        dataset_counts = context.get('dataset_counts')
-
-        if dataset_count_instead_of_dicts is False or dataset_counts is None:
-            # Ask SOLR for the packages for this org/group
+    if packages_field:
+        def get_packages_for_this_group(group_):
+            # Ask SOLR for the list of packages for this org/group
             q = {
                 'facet': 'false',
                 'rows': 0,
             }
 
-            if group.is_organization:
-                q['fq'] = 'owner_org:"{0}"'.format(group.id)
+            if group_.is_organization:
+                q['fq'] = 'owner_org:"{0}"'.format(group_.id)
             else:
-                q['fq'] = 'groups:"{0}"'.format(group.name)
+                q['fq'] = 'groups:"{0}"'.format(group_.name)
 
             is_group_member = (context.get('user') and
-                new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
+                new_authz.has_user_permission_for_group_or_org(group_.id, context.get('user'), 'read'))
             if is_group_member:
                 context['ignore_capacity_check'] = True
 
-            if include_datasets and not context.get('for_view'):
+            if not context.get('for_view'):
                 q['rows'] = 1000    # Only the first 1000 datasets are returned
 
             search_context = dict((k, v) for (k, v) in context.items() if k != 'schema')
             search_results = logic.get_action('package_search')(search_context, q)
-            package_count = search_results['count']
+            return search_results['count'], search_results['results']
+        if packages_field == 'datasets':
+            package_count, packages = get_packages_for_this_group(group)
+            result_dict['packages'] = packages
         else:
-            # Use the package_counts passed in
-            facets = dataset_counts
-            if group.is_organization:
-                package_count = facets['owner_org'].get(group.id, 0)
+            # i.e. packages_field is 'dataset_count' or
+            # 'none_but_include_package_count'
+            if dataset_counts is None:
+                package_count, packages = get_packages_for_this_group(group)
             else:
-                package_count = facets['groups'].get(group.name, 0)
-
-        if dataset_count_instead_of_dicts:
-            result_dict['packages'] = package_count
-        elif include_datasets:
-            result_dict['packages'] = search_results['results']
+                # Use the pre-calculated package_counts passed in.
+                facets = dataset_counts
+                if group.is_organization:
+                    package_count = facets['owner_org'].get(group.id, 0)
+                else:
+                    package_count = facets['groups'].get(group.name, 0)
+            if packages_field != 'none_but_include_package_count':
+                result_dict['packages'] = package_count
 
         result_dict['package_count'] = package_count
 
     if include_tags:
+        # group tags are not creatable via the API yet, but that was(/is) a
+        # future intention (see kindly's commit 5c8df894 on 2011/12/23)
         result_dict['tags'] = tag_list_dictize(
             _get_members(context, group, 'tags'),
             context)
 
     if include_groups:
-        group_list_context = dict(context.items()[:])
-        group_list_context.update((
-            ('include_tags', False),
-            ('include_users', False),
-            ))
+        # these sub-groups won't have tags or extras for speed
         result_dict['groups'] = group_list_dictize(
-            _get_members(group_list_context, group, 'groups'),
-            context)
+            _get_members(context, group, 'groups'),
+            context, include_groups=True)
 
-    import pdb; pdb.set_trace()
     if include_users:
-        if 'is_group_member' not in dir():
-            # expensive call, so avoid if was done earlier in this method
-            is_group_member = (context.get('user') and
-                new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
-        if is_group_member:
-            result_dict['users'] = user_list_dictize(
-                _get_members(context, group, 'users'),
-                context)
+        result_dict['users'] = user_list_dictize(
+            _get_members(context, group, 'users'),
+            context)
 
     context['with_capacity'] = False
 
diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py
index 35927492606..d6fa46d5a3f 100644
--- a/ckan/logic/action/get.py
+++ b/ckan/logic/action/get.py
@@ -388,10 +388,11 @@ def _group_or_org_list(context, data_dict, is_org=False):
                                total=1)
 
     all_fields = data_dict.get('all_fields', None)
-    include_extras = asbool(data_dict.get('include_extras', False))
+    include_extras = all_fields and \
+                     asbool(data_dict.get('include_extras', False))
 
     query = model.Session.query(model.Group).join(model.GroupRevision)
-    if all_fields and include_extras:
+    if include_extras:
         # this does an eager load of the extras, avoiding an sql query every
         # time group_list_dictize accesses a group's extra.
         query = query.options(sqlalchemy.orm.joinedload(model.Group._extras))
@@ -410,23 +411,21 @@ def _group_or_org_list(context, data_dict, is_org=False):
     query = query.filter(model.GroupRevision.is_organization == is_org)
 
     groups = query.all()
-    group_list_context = dict(context.items()[:])
-    if not all_fields:
-        group_list_context.update((
-            ('include_tags', False),
-            ('include_extras', False),
-            ))
+    if all_fields:
+        include_tags = asbool(data_dict.get('include_tags', False))
     else:
-        group_list_context.update((
-            ('include_tags', asbool(data_dict.get('include_tags', False))),
-            ('include_extras', include_extras),
-            ))
+        include_tags = False
+    # even if we are not going to return all_fields, we need to dictize all the
+    # groups so that we can sort by any field.
     group_list = model_dictize.group_list_dictize(
-        groups, group_list_context,
+        groups, context,
         sort_key=lambda x: x[sort_info[0][0]],
         reverse=sort_info[0][1] == 'desc',
         with_package_counts=all_fields or sort_info[0][0] == 'packages',
-        include_groups=asbool(data_dict.get('include_groups', False)))
+        include_groups=asbool(data_dict.get('include_groups', False)),
+        include_tags=include_tags,
+        include_extras=include_extras,
+        )
 
     if not all_fields:
         group_list = [group[ref_group_by] for group in group_list]
@@ -1059,7 +1058,8 @@ def _group_or_org_show(context, data_dict, is_org=False):
     include_datasets = data_dict.get('include_datasets', True)
     if isinstance(include_datasets, basestring):
         include_datasets = (include_datasets.lower() in ('true', '1'))
-    context['include_datasets'] = include_datasets
+    packages_field = 'datasets' if include_datasets \
+                     else 'none_but_include_package_count'
 
     if group is None:
         raise NotFound
@@ -1073,7 +1073,8 @@ def _group_or_org_show(context, data_dict, is_org=False):
     else:
         _check_access('group_show', context, data_dict)
 
-    group_dict = model_dictize.group_dictize(group, context)
+    group_dict = model_dictize.group_dictize(group, context,
+                                             packages_field=packages_field)
 
     if is_org:
         plugin_type = plugins.IOrganizationController
diff --git a/ckan/new_tests/lib/dictization/test_model_dictize.py b/ckan/new_tests/lib/dictization/test_model_dictize.py
new file mode 100644
index 00000000000..9f45a402d68
--- /dev/null
+++ b/ckan/new_tests/lib/dictization/test_model_dictize.py
@@ -0,0 +1,206 @@
+from nose.tools import assert_equal
+
+from ckan.lib.dictization import model_dictize
+from ckan import model
+from ckan.lib import search
+
+from ckan.new_tests import helpers, factories
+
+
+class TestDictize:
+
+    def setup(self):
+        helpers.reset_db()
+        search.clear()
+
+    def test_group_list_dictize(self):
+        group = factories.Group()
+        group_list = model.Session.query(model.Group).filter_by().all()
+        context = {'model': model, 'session': model.Session}
+
+        group_dicts = model_dictize.group_list_dictize(group_list, context)
+
+        assert_equal(len(group_dicts), 1)
+        assert_equal(group_dicts[0]['name'], group['name'])
+        assert_equal(group_dicts[0]['packages'], 0)
+        assert 'extras' not in group_dicts[0]
+        assert 'tags' not in group_dicts[0]
+        assert 'groups' not in group_dicts[0]
+
+    def test_group_list_dictize_sorted(self):
+        factories.Group(name='aa')
+        factories.Group(name='bb')
+        group_list = [model.Group.get(u'bb'),
+                      model.Group.get(u'aa')]
+        context = {'model': model, 'session': model.Session}
+
+        group_dicts = model_dictize.group_list_dictize(group_list, context)
+
+        # list is resorted by name
+        assert_equal(group_dicts[0]['name'], 'aa')
+        assert_equal(group_dicts[1]['name'], 'bb')
+
+    def test_group_list_dictize_reverse_sorted(self):
+        factories.Group(name='aa')
+        factories.Group(name='bb')
+        group_list = [model.Group.get(u'aa'),
+                      model.Group.get(u'bb')]
+        context = {'model': model, 'session': model.Session}
+
+        group_dicts = model_dictize.group_list_dictize(group_list, context,
+                                                       reverse=True)
+
+        assert_equal(group_dicts[0]['name'], 'bb')
+        assert_equal(group_dicts[1]['name'], 'aa')
+
+    def test_group_list_dictize_without_package_count(self):
+        group = factories.Group()
+        factories.Dataset(group=group['name'])
+        group_list = [model.Group.get(group['name'])]
+        context = {'model': model, 'session': model.Session}
+
+        group_dicts = model_dictize.group_list_dictize(
+            group_list, context, with_package_counts=False)
+
+        assert 'packages' not in group_dicts[0]
+
+    def test_group_list_dictize_including_extras(self):
+        factories.Group(extras=[{'key': 'k1', 'value': 'v1'}])
+        group_list = model.Session.query(model.Group).filter_by().all()
+        context = {'model': model, 'session': model.Session}
+
+        group_dicts = model_dictize.group_list_dictize(group_list, context,
+                                                       include_extras=True)
+
+        assert_equal(group_dicts[0]['extras'][0]['key'], 'k1')
+
+    def test_group_list_dictize_including_tags(self):
+        factories.Group()
+        # group tags aren't in the group_create schema, so its slightly more
+        # convoluted way to create them
+        group_obj = model.Session.query(model.Group).first()
+        tag = model.Tag(name='t1')
+        model.Session.add(tag)
+        model.Session.commit()
+        tag = model.Session.query(model.Tag).first()
+        group_obj = model.Session.query(model.Group).first()
+        member = model.Member(group=group_obj, table_id=tag.id,
+                              table_name='tag')
+        model.Session.add(member)
+        model.repo.new_revision()
+        model.Session.commit()
+        group_list = model.Session.query(model.Group).filter_by().all()
+        context = {'model': model, 'session': model.Session}
+
+        group_dicts = model_dictize.group_list_dictize(group_list, context,
+                                                       include_tags=True)
+
+        assert_equal(group_dicts[0]['tags'][0]['name'], 't1')
+
+    def test_group_list_dictize_including_groups(self):
+        factories.Group(name='parent')
+        factories.Group(name='child', groups=[{'name': 'parent'}])
+        group_list = [model.Group.get(u'parent'), model.Group.get(u'child')]
+        context = {'model': model, 'session': model.Session}
+
+        child_dict, parent_dict = model_dictize.group_list_dictize(
+                group_list, context, include_groups=True)
+
+        assert_equal(parent_dict['name'], 'parent')
+        assert_equal(child_dict['name'], 'child')
+        assert_equal(parent_dict['groups'], [])
+        assert_equal(child_dict['groups'][0]['name'], 'parent')
+
+    def test_group_dictize(self):
+        group = factories.Group()
+        factories.Dataset(group=group['name'])
+        group_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session}
+
+        group = model_dictize.group_dictize(group_obj, context)
+
+        assert_equal(group['name'], 'test_group_0')
+        assert_equal(group['packages'], [])
+        assert_equal(group['extras'], [])
+        assert_equal(group['tags'], [])
+        assert_equal(group['groups'], [])
+
+    def test_group_dictize_without_packages(self):
+        # group_list_dictize might not be interested in packages at all
+        # so sets these options. e.g. it is not all_fields nor are the results
+        # sorted by the number of packages.
+        factories.Group()
+        group_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session}
+
+        group = model_dictize.group_dictize(group_obj, context,
+                                            packages_field=None)
+
+        assert 'packages' not in group
+
+    def test_group_dictize_with_package_list(self):
+        group_ = factories.Group()
+        package = factories.Dataset(groups=[{'name': group_['name']}])
+        group_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session}
+
+        group = model_dictize.group_dictize(group_obj, context)
+
+        assert_equal(type(group['packages']), list)
+        assert_equal(len(group['packages']), 1)
+        assert_equal(group['packages'][0]['name'], package['name'])
+
+    def test_group_dictize_with_package_count(self):
+        # group_list_dictize calls it like this by default
+        group_ = factories.Group()
+        factories.Dataset(groups=[{'name': group_['name']}])
+        group_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session,
+                   'dataset_counts': model_dictize.get_group_dataset_counts()
+                   }
+
+        group = model_dictize.group_dictize(group_obj, context,
+                                            packages_field='dataset_count')
+
+        assert_equal(group['packages'], 1)
+        assert_equal(group['package_count'], 1)
+
+    def test_group_dictize_with_no_packages_field_but_still_package_count(self):
+        # logic.get.group_show calls it like this when not include_datasets
+        group_ = factories.Group()
+        factories.Dataset(groups=[{'name': group_['name']}])
+        group_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session}
+        # not supplying dataset_counts in this case either
+
+        group = model_dictize.group_dictize(group_obj, context,
+                           packages_field='none_but_include_package_count')
+
+        assert 'packages' not in group
+        assert_equal(group['package_count'], 1)
+
+    def test_group_dictize_for_org_with_package_list(self):
+        org_ = factories.Organization()
+        package = factories.Dataset(owner_org=org_['id'])
+        group_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session}
+
+        org = model_dictize.group_dictize(group_obj, context)
+
+        assert_equal(type(org['packages']), list)
+        assert_equal(len(org['packages']), 1)
+        assert_equal(org['packages'][0]['name'], package['name'])
+
+    def test_group_dictize_for_org_with_package_count(self):
+        # group_list_dictize calls it like this by default
+        org_ = factories.Organization()
+        factories.Dataset(owner_org=org_['id'])
+        org_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session,
+                   'dataset_counts': model_dictize.get_group_dataset_counts()
+                   }
+
+        org = model_dictize.group_dictize(org_obj, context,
+                                          packages_field='dataset_count')
+
+        assert_equal(org['packages'], 1)
diff --git a/ckan/new_tests/logic/action/test_get.py b/ckan/new_tests/logic/action/test_get.py
index 816004aab42..cf523586b23 100644
--- a/ckan/new_tests/logic/action/test_get.py
+++ b/ckan/new_tests/logic/action/test_get.py
@@ -11,30 +11,74 @@
 
 class TestGet(object):
 
-    @classmethod
-    def setup_class(cls):
-        helpers.reset_db()
-
     def setup(self):
-        import ckan.model as model
-
-        # Reset the db before each test method.
-        model.repo.rebuild_db()
+        helpers.reset_db()
 
         # Clear the search index
         search.clear()
 
     def test_group_list(self):
 
-        user = factories.User()
-        group1 = factories.Group(user=user)
-        group2 = factories.Group(user=user)
+        group1 = factories.Group()
+        group2 = factories.Group()
 
         group_list = helpers.call_action('group_list')
 
         assert (sorted(group_list) ==
                 sorted([g['name'] for g in [group1, group2]]))
 
+    def test_group_list_all_fields(self):
+
+        group = factories.Group()
+
+        group_list = helpers.call_action('group_list', all_fields=True)
+
+        expected_group = dict(group.items()[:])
+        for field in ('users', 'tags', 'extras', 'groups'):
+            del expected_group[field]
+        expected_group['packages'] = 0
+        assert group_list[0] == expected_group
+        assert 'extras' not in group_list[0]
+        assert 'tags' not in group_list[0]
+        assert 'groups' not in group_list[0]
+        assert 'users' not in group_list[0]
+        assert 'datasets' not in group_list[0]
+
+    def test_group_list_extras_returned(self):
+
+        group = factories.Group(extras=[{'key': 'key1', 'value': 'val1'}])
+
+        group_list = helpers.call_action('group_list', all_fields=True,
+                                         include_extras=True)
+
+        eq(group_list[0]['extras'], group['extras'])
+        eq(group_list[0]['extras'][0]['key'], 'key1')
+
+    # NB there is no test_group_list_tags_returned because tags are not in the
+    # group_create schema (yet)
+
+    def test_group_list_groups_returned(self):
+
+        parent_group = factories.Group(tags=[{'name': 'river'}])
+        child_group = factories.Group(groups=[{'name': parent_group['name']}],
+                                      tags=[{'name': 'river'}])
+
+        group_list = helpers.call_action('group_list', all_fields=True,
+                                         include_groups=True)
+
+        child_group_returned = group_list[0]
+        if group_list[0]['name'] == child_group['name']:
+            child_group_returned, parent_group_returned = group_list
+        else:
+            child_group_returned, parent_group_returned = group_list[::-1]
+        expected_parent_group = dict(parent_group.items()[:])
+        for field in ('users', 'tags', 'extras'):
+            del expected_parent_group[field]
+        expected_parent_group['capacity'] = u'public'
+        expected_parent_group['packages'] = 0
+        expected_parent_group['package_count'] = 0
+        eq(child_group_returned['groups'], [expected_parent_group])
+
     def test_group_show(self):
 
         group = factories.Group(user=factories.User())

From 275f4b9c695ae6384058bbffe2785633cdd89319 Mon Sep 17 00:00:00 2001
From: David Read <david.read@hackneyworkshop.com>
Date: Fri, 11 Jul 2014 15:38:28 +0000
Subject: [PATCH 25/36] [#1768] Tests fix / PEP8.

---
 ckan/logic/action/get.py                            | 12 +++++++++---
 .../new_tests/lib/dictization/test_model_dictize.py |  5 +++--
 ckan/tests/lib/test_dictization.py                  | 13 ++++++++++++-
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py
index d6fa46d5a3f..815cd5166ed 100644
--- a/ckan/logic/action/get.py
+++ b/ckan/logic/action/get.py
@@ -447,7 +447,10 @@ def group_list(context, data_dict):
         groups whose names are in this list will be returned (optional)
     :type groups: list of strings
     :param all_fields: return group dictionaries instead of just names. Only
-        core fields are returned - get some more using the include_* options
+        core fields are returned - get some more using the include_* options.
+        Returning a list of packages is too expensive, so the `packages`
+        property for each group is deprecated, but there is a count of the
+        packages in the `package_count` property.
         (optional, default: ``False``)
     :type all_fields: boolean
     :param include_extras: if all_fields, include the group extra fields
@@ -469,7 +472,7 @@ def group_list(context, data_dict):
 
 
 def organization_list(context, data_dict):
-    '''Return a list of the names of the site's organizations.
+    '''Return a list of the names of the site's organizations. 
 
     :param order_by: the field to sort the list by, must be ``'name'`` or
       ``'packages'`` (optional, default: ``'name'``) Deprecated use sort.
@@ -483,7 +486,10 @@ def organization_list(context, data_dict):
         returned (optional)
     :type organizations: list of strings
     :param all_fields: return group dictionaries instead of just names. Only
-        core fields are returned - get some more using the include_* options
+        core fields are returned - get some more using the include_* options.
+        Returning a list of packages is too expensive, so the `packages`
+        property for each group is deprecated, but there is a count of the
+        packages in the `package_count` property.
         (optional, default: ``False``)
     :type all_fields: boolean
     :param include_extras: if all_fields, include the group extra fields
diff --git a/ckan/new_tests/lib/dictization/test_model_dictize.py b/ckan/new_tests/lib/dictization/test_model_dictize.py
index 9f45a402d68..997bc6273a6 100644
--- a/ckan/new_tests/lib/dictization/test_model_dictize.py
+++ b/ckan/new_tests/lib/dictization/test_model_dictize.py
@@ -104,7 +104,7 @@ def test_group_list_dictize_including_groups(self):
         context = {'model': model, 'session': model.Session}
 
         child_dict, parent_dict = model_dictize.group_list_dictize(
-                group_list, context, include_groups=True)
+            group_list, context, include_groups=True)
 
         assert_equal(parent_dict['name'], 'parent')
         assert_equal(child_dict['name'], 'child')
@@ -174,7 +174,8 @@ def test_group_dictize_with_no_packages_field_but_still_package_count(self):
         # not supplying dataset_counts in this case either
 
         group = model_dictize.group_dictize(group_obj, context,
-                           packages_field='none_but_include_package_count')
+                                            packages_field=
+                                            'none_but_include_package_count')
 
         assert 'packages' not in group
         assert_equal(group['package_count'], 1)
diff --git a/ckan/tests/lib/test_dictization.py b/ckan/tests/lib/test_dictization.py
index 954e3d5d4aa..cbbfe7f0338 100644
--- a/ckan/tests/lib/test_dictization.py
+++ b/ckan/tests/lib/test_dictization.py
@@ -885,8 +885,17 @@ def test_16_group_dictized(self):
                              'title': 'simple',
                              'type': 'organization',
                             }
+        anna2_dictized = {
+            u'license_id': u'other-open',
+            u'name': u'annakarenina2',
+            u'notes': u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n',
+            u'title': u'A Novel By Tolstoy',
+            u'url': u'http://www.annakarenina.com',
+            u'version': u'0.7a',
+            }
         model.repo.new_revision()
         group_dict_save(simple_group_dict, context)
+        table_dict_save(anna2_dictized, model.Package, context)
         model.Session.commit()
         model.Session.remove()
 
@@ -922,12 +931,14 @@ def test_16_group_dictized(self):
                                {'key': u'media', 'state': u'active', 'value': u'"dvd"'}],
                     'tags': [{'capacity': u'public', 'display_name': u'russian', 'name': u'russian'}],
                     'groups': [{'description': u'',
-                               'capacity' : 'public',
+                               'capacity': u'public',
                                'display_name': u'simple',
                                'image_url': u'',
                                'image_display_url': u'',
                                'name': u'simple',
                                'packages': 0,
+                               'package_count': 0,
+                               'groups': [],
                                'state': u'active',
                                'is_organization': False,
                                'title': u'simple',

From aae1ac1da73dc2eea32fb0487221dff2cee6b910 Mon Sep 17 00:00:00 2001
From: David Read <david.read@hackneyworkshop.com>
Date: Fri, 11 Jul 2014 17:33:45 +0100
Subject: [PATCH 26/36] [#1768] Use package_count for the sort as well.
 Transition from legacy tests.

---
 ckan/logic/action/get.py                      |  16 ++-
 .../lib/dictization/test_model_dictize.py     |  57 +++++++-
 ckan/new_tests/logic/action/test_get.py       |  52 +++++++
 ckan/tests/lib/test_dictization.py            | 136 ------------------
 ckan/tests/logic/test_action.py               | 130 -----------------
 5 files changed, 114 insertions(+), 277 deletions(-)

diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py
index 815cd5166ed..f9386777cb1 100644
--- a/ckan/logic/action/get.py
+++ b/ckan/logic/action/get.py
@@ -380,11 +380,12 @@ def _group_or_org_list(context, data_dict, is_org=False):
             sort = order_by
     # if the sort is packages and no sort direction is supplied we want to do a
     # reverse sort to maintain compatibility.
-    if sort.strip() == 'packages':
-        sort = 'packages desc'
+    if sort.strip() in ('packages', 'package_count'):
+        sort = 'package_count desc'
 
     sort_info = _unpick_search(sort,
-                               allowed_fields=['name', 'packages'],
+                               allowed_fields=['name', 'packages',
+                                               'package_count'],
                                total=1)
 
     all_fields = data_dict.get('all_fields', None)
@@ -421,7 +422,8 @@ def _group_or_org_list(context, data_dict, is_org=False):
         groups, context,
         sort_key=lambda x: x[sort_info[0][0]],
         reverse=sort_info[0][1] == 'desc',
-        with_package_counts=all_fields or sort_info[0][0] == 'packages',
+        with_package_counts=all_fields or
+        sort_info[0][0] in ('packages', 'package_count'),
         include_groups=asbool(data_dict.get('include_groups', False)),
         include_tags=include_tags,
         include_extras=include_extras,
@@ -441,7 +443,7 @@ def group_list(context, data_dict):
     :type order_by: string
     :param sort: sorting of the search results.  Optional.  Default:
         "name asc" string of field name and sort-order. The allowed fields are
-        'name' and 'packages'
+        'name' and 'package_count'
     :type sort: string
     :param groups: a list of names of the groups to return, if given only
         groups whose names are in this list will be returned (optional)
@@ -472,14 +474,14 @@ def group_list(context, data_dict):
 
 
 def organization_list(context, data_dict):
-    '''Return a list of the names of the site's organizations. 
+    '''Return a list of the names of the site's organizations.
 
     :param order_by: the field to sort the list by, must be ``'name'`` or
       ``'packages'`` (optional, default: ``'name'``) Deprecated use sort.
     :type order_by: string
     :param sort: sorting of the search results.  Optional.  Default:
         "name asc" string of field name and sort-order. The allowed fields are
-        'name' and 'packages'
+        'name' and 'package_count'
     :type sort: string
     :param organizations: a list of names of the groups to return,
         if given only groups whose names are in this list will be
diff --git a/ckan/new_tests/lib/dictization/test_model_dictize.py b/ckan/new_tests/lib/dictization/test_model_dictize.py
index 997bc6273a6..197aa8b4721 100644
--- a/ckan/new_tests/lib/dictization/test_model_dictize.py
+++ b/ckan/new_tests/lib/dictization/test_model_dictize.py
@@ -53,10 +53,27 @@ def test_group_list_dictize_reverse_sorted(self):
         assert_equal(group_dicts[0]['name'], 'bb')
         assert_equal(group_dicts[1]['name'], 'aa')
 
+    def test_group_list_dictize_sort_by_package_count(self):
+        factories.Group(name='aa')
+        factories.Group(name='bb')
+        factories.Dataset(groups=[{'name': 'aa'}, {'name': 'bb'}])
+        factories.Dataset(groups=[{'name': 'bb'}])
+        group_list = [model.Group.get(u'bb'),
+                      model.Group.get(u'aa')]
+        context = {'model': model, 'session': model.Session}
+
+        group_dicts = model_dictize.group_list_dictize(
+            group_list, context, sort_key=lambda x: x['package_count'],
+            with_package_counts=True)
+
+        # list is resorted by package counts
+        assert_equal(group_dicts[0]['name'], 'aa')
+        assert_equal(group_dicts[1]['name'], 'bb')
+
     def test_group_list_dictize_without_package_count(self):
-        group = factories.Group()
-        factories.Dataset(group=group['name'])
-        group_list = [model.Group.get(group['name'])]
+        group_ = factories.Group()
+        factories.Dataset(groups=[{'name': group_['name']}])
+        group_list = [model.Group.get(group_['name'])]
         context = {'model': model, 'session': model.Session}
 
         group_dicts = model_dictize.group_list_dictize(
@@ -113,7 +130,6 @@ def test_group_list_dictize_including_groups(self):
 
     def test_group_dictize(self):
         group = factories.Group()
-        factories.Dataset(group=group['name'])
         group_obj = model.Session.query(model.Group).filter_by().first()
         context = {'model': model, 'session': model.Session}
 
@@ -125,6 +141,39 @@ def test_group_dictize(self):
         assert_equal(group['tags'], [])
         assert_equal(group['groups'], [])
 
+    def test_group_dictize_group_with_dataset(self):
+        group_ = factories.Group()
+        package = factories.Dataset(groups=[{'name': group_['name']}])
+        group_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session}
+
+        group = model_dictize.group_dictize(group_obj, context)
+
+        assert_equal(group['packages'][0]['name'], package['name'])
+        assert_equal(group['packages'][0]['groups'][0]['name'], group_['name'])
+
+    def test_group_dictize_group_with_extra(self):
+        factories.Group(extras=[{'key': 'k1', 'value': 'v1'}])
+        group_obj = model.Session.query(model.Group).filter_by().first()
+        context = {'model': model, 'session': model.Session}
+
+        group = model_dictize.group_dictize(group_obj, context)
+
+        assert_equal(group['extras'][0]['key'], 'k1')
+
+    def test_group_dictize_group_with_parent_group(self):
+        factories.Group(name='parent')
+        factories.Group(name='child', groups=[{'name': 'parent'}])
+        group_obj = model.Group.get('child')
+        context = {'model': model, 'session': model.Session}
+
+        group = model_dictize.group_dictize(group_obj, context)
+
+        assert_equal(len(group['groups']), 1)
+        assert_equal(group['groups'][0]['name'], 'parent')
+        assert_equal(group['groups'][0]['packages'], 0)  # deprecated
+        assert_equal(group['groups'][0]['package_count'], 0)
+
     def test_group_dictize_without_packages(self):
         # group_list_dictize might not be interested in packages at all
         # so sets these options. e.g. it is not all_fields nor are the results
diff --git a/ckan/new_tests/logic/action/test_get.py b/ckan/new_tests/logic/action/test_get.py
index cf523586b23..227446cf185 100644
--- a/ckan/new_tests/logic/action/test_get.py
+++ b/ckan/new_tests/logic/action/test_get.py
@@ -27,6 +27,30 @@ def test_group_list(self):
         assert (sorted(group_list) ==
                 sorted([g['name'] for g in [group1, group2]]))
 
+    def test_group_list_sort_by_package_count(self):
+
+        factories.Group(name='aa')
+        factories.Group(name='bb')
+        factories.Dataset(groups=[{'name': 'aa'}, {'name': 'bb'}])
+        factories.Dataset(groups=[{'name': 'bb'}])
+
+        group_list = helpers.call_action('group_list', sort='package_count')
+        # default is descending order
+
+        eq(group_list, ['bb', 'aa'])
+
+    def test_group_list_sort_by_package_count_ascending(self):
+
+        factories.Group(name='aa')
+        factories.Group(name='bb')
+        factories.Dataset(groups=[{'name': 'aa'}, {'name': 'bb'}])
+        factories.Dataset(groups=[{'name': 'aa'}])
+
+        group_list = helpers.call_action('group_list',
+                                         sort='package_count asc')
+
+        eq(group_list, ['bb', 'aa'])
+
     def test_group_list_all_fields(self):
 
         group = factories.Group()
@@ -89,6 +113,20 @@ def test_group_show(self):
         group_dict.pop('num_followers', None)
         assert group_dict == group
 
+    def test_group_show_error_not_found(self):
+
+        nose.tools.assert_raises(
+            logic.NotFound,
+            helpers.call_action, 'group_show', id='does_not_exist')
+
+    def test_group_show_error_for_organization(self):
+
+        org = factories.Organization()
+
+        nose.tools.assert_raises(
+            logic.NotFound,
+            helpers.call_action, 'group_show', id=org['id'])
+
     def test_group_show_packages_returned(self):
 
         user_name = helpers.call_action('get_site_user')['name']
@@ -152,6 +190,20 @@ def test_organization_show(self):
         org_dict.pop('num_followers', None)
         assert org_dict == org
 
+    def test_organization_show_error_not_found(self):
+
+        nose.tools.assert_raises(
+            logic.NotFound,
+            helpers.call_action, 'organization_show', id='does_not_exist')
+
+    def test_organization_show_error_for_group(self):
+
+        group = factories.Group()
+
+        nose.tools.assert_raises(
+            logic.NotFound,
+            helpers.call_action, 'organization_show', id=group['id'])
+
     def test_organization_show_packages_returned(self):
 
         user_name = helpers.call_action('get_site_user')['name']
diff --git a/ckan/tests/lib/test_dictization.py b/ckan/tests/lib/test_dictization.py
index cbbfe7f0338..9e3956c320e 100644
--- a/ckan/tests/lib/test_dictization.py
+++ b/ckan/tests/lib/test_dictization.py
@@ -10,7 +10,6 @@
 
 from ckan.lib.dictization.model_dictize import (package_dictize,
                                                 resource_dictize,
-                                                group_dictize,
                                                 activity_dictize,
                                                 package_to_api1,
                                                 package_to_api2,
@@ -876,141 +875,6 @@ def test_15_api_to_dictize(self):
 
         package_dictized = self.remove_changable_columns(package_dictize(pkg, context))
 
-    def test_16_group_dictized(self):
-
-        context = {"model": model,
-                  "session": model.Session}
-
-        simple_group_dict = {'name': 'simple',
-                             'title': 'simple',
-                             'type': 'organization',
-                            }
-        anna2_dictized = {
-            u'license_id': u'other-open',
-            u'name': u'annakarenina2',
-            u'notes': u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n',
-            u'title': u'A Novel By Tolstoy',
-            u'url': u'http://www.annakarenina.com',
-            u'version': u'0.7a',
-            }
-        model.repo.new_revision()
-        group_dict_save(simple_group_dict, context)
-        table_dict_save(anna2_dictized, model.Package, context)
-        model.Session.commit()
-        model.Session.remove()
-
-        context = {"model": model,
-                  "session": model.Session}
-
-        group_dict = {'name': 'help',
-                      'title': 'help',
-                      'approval_status': 'approved',
-                      'extras': [{'key': 'genre', 'value': u'"horror"'},
-                                 {'key': 'media', 'value': u'"dvd"'}],
-                      'packages':[{'name': 'annakarenina2'}],
-                      'users':[{'name': 'annafan'}],
-                      'groups':[{'name': 'simple'}],
-                      'tags':[{'name': 'russian'}]
-                      }
-
-        model.repo.new_revision()
-        group_dict_save(group_dict, context)
-        model.Session.commit()
-        model.Session.remove()
-
-        group = model.Session.query(model.Group).filter_by(name=u'help').one()
-
-        context = {"model": model,
-                  "session": model.Session,
-                  "user": None}
-
-        group_dictized = group_dictize(group, context)
-
-        expected = {'description': u'',
-                    'extras': [{'key': u'genre', 'state': u'active', 'value': u'"horror"'},
-                               {'key': u'media', 'state': u'active', 'value': u'"dvd"'}],
-                    'tags': [{'capacity': u'public', 'display_name': u'russian', 'name': u'russian'}],
-                    'groups': [{'description': u'',
-                               'capacity': u'public',
-                               'display_name': u'simple',
-                               'image_url': u'',
-                               'image_display_url': u'',
-                               'name': u'simple',
-                               'packages': 0,
-                               'package_count': 0,
-                               'groups': [],
-                               'state': u'active',
-                               'is_organization': False,
-                               'title': u'simple',
-                               'type': u'organization',
-                               'approval_status': u'approved'}],
-                    'users': [{'about': u'I love reading Annakarenina. My site: http://anna.com',
-                              'display_name': u'annafan',
-                              'capacity' : 'public',
-                              'state': 'active',
-                              'sysadmin': False,
-                              'email_hash': 'd41d8cd98f00b204e9800998ecf8427e',
-                              'fullname': None,
-                              'name': u'annafan',
-                              'number_administered_packages': 1L,
-                              'number_of_edits': 0L,
-                              'activity_streams_email_notifications': False,
-                              }],
-                    'name': u'help',
-                    'display_name': u'help',
-                    'image_url': u'',
-                    'image_display_url': u'',
-                    'package_count': 1,
-                    'is_organization': False,
-                    'packages': [{u'author': None,
-                              u'author_email': None,
-                              u'creator_user_id': None,
-                              u'extras': [],
-                              u'groups':[
-                                 {u'title': u'help',
-                                  u'display_name': u'help',
-                                  u'description': u'',
-                                  u'name': u'help',
-                                  u'image_display_url': u''}
-                              ],
-                              u'isopen': True,
-                              u'license_id': u'other-open',
-                              u'license_title': u'Other (Open)',
-                              u'maintainer': None,
-                              u'maintainer_email': None,
-                              u'name': u'annakarenina2',
-                              u'notes': u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n<http://ckan.net/>\n\n',
-                              u'num_resources': 0,
-                              u'num_tags': 0,
-                              u'organization': None,
-                              u'owner_org': None,
-                              u'private': False,
-                              u'relationships_as_object': [],
-                              u'relationships_as_subject': [],
-                              u'resources': [],
-                              u'state': u'active',
-                              u'tags': [],
-                              u'title': u'A Novel By Tolstoy',
-                              u'tracking_summary': {u'recent': 0, u'total': 0},
-                              u'type': u'dataset',
-                              u'url': u'http://www.annakarenina.com',
-                              u'version': u'0.7a'},
-                    ],
-                    'state': u'active',
-                    'approval_status': u'approved',
-                    'title': u'help',
-                    'type': u'group'}
-        expected['packages'] = sorted(expected['packages'], key=lambda x: x['name'])
-        result = self.remove_changable_columns(group_dictized)
-        result['packages'] = sorted(result['packages'], key=lambda x: x['name'])
-
-        assert_equal(sorted(result.keys()), sorted(expected.keys()))
-        
-        for key in result:
-            if key in ('is_organization', 'package_count'):
-                continue
-            assert_equal(sorted(result[key]), sorted(expected[key]))
-        assert_equal(result['package_count'], expected['package_count'])
 
     def test_17_group_apis_to_dict(self):
 
diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py
index e41271b179a..45ab4d3cf23 100644
--- a/ckan/tests/logic/test_action.py
+++ b/ckan/tests/logic/test_action.py
@@ -579,83 +579,6 @@ def test_user_delete_requires_data_dict_with_key_id(self):
         assert res_obj['success'] is False
         assert res_obj['error']['id'] == ['Missing value']
 
-    def test_13_group_list(self):
-        postparams = '%s=1' % json.dumps({})
-        res = self.app.post('/api/action/group_list', params=postparams)
-        res_obj = json.loads(res.body)
-        assert res_obj['result'] == ['david', 'roger']
-        assert res_obj['success'] is True
-        assert res_obj['help'].startswith(
-                "Return a list of the names of the site's groups.")
-
-        # Test GET request
-        res = self.app.get('/api/action/group_list')
-        res_obj = json.loads(res.body)
-        assert res_obj['result'] == ['david', 'roger']
-
-        #Get all fields
-        postparams = '%s=1' % json.dumps({'all_fields':True})
-        res = self.app.post('/api/action/group_list', params=postparams)
-        res_obj = json.loads(res.body)
-
-        assert res_obj['success'] == True
-        assert res_obj['result'][0]['name'] == 'david'
-        assert res_obj['result'][0]['display_name'] == 'Dave\'s books'
-        assert res_obj['result'][0]['packages'] == 2
-        assert res_obj['result'][1]['name'] == 'roger', res_obj['result'][1]
-        assert res_obj['result'][1]['packages'] == 1
-        assert 'id' in res_obj['result'][0]
-        assert 'revision_id' in res_obj['result'][0]
-        assert 'state' in res_obj['result'][0]
-
-    def test_13_group_list_by_size(self):
-        postparams = '%s=1' % json.dumps({'order_by': 'packages'})
-        res = self.app.post('/api/action/group_list',
-                            params=postparams)
-        res_obj = json.loads(res.body)
-        assert_equal(sorted(res_obj['result']), ['david','roger'])
-
-    def test_13_group_list_by_size_all_fields(self):
-        postparams = '%s=1' % json.dumps({'order_by': 'packages',
-                                          'all_fields': 1})
-        res = self.app.post('/api/action/group_list',
-                            params=postparams)
-        res_obj = json.loads(res.body)
-        result = res_obj['result']
-        assert_equal(len(result), 2)
-        assert_equal(result[0]['name'], 'david')
-        assert_equal(result[0]['packages'], 2)
-        assert_equal(result[1]['name'], 'roger')
-        assert_equal(result[1]['packages'], 1)
-
-    def test_14_group_show(self):
-        postparams = '%s=1' % json.dumps({'id':'david'})
-        res = self.app.post('/api/action/group_show', params=postparams)
-        res_obj = json.loads(res.body)
-        assert res_obj['help'].startswith("Return the details of a group.")
-        assert res_obj['success'] == True
-        result = res_obj['result']
-        assert result['name'] == 'david'
-        assert result['title'] == result['display_name'] == 'Dave\'s books'
-        assert result['state'] == 'active'
-        assert 'id' in result
-        assert 'revision_id' in result
-        assert len(result['packages']) == 2
-
-        #Group not found
-        postparams = '%s=1' % json.dumps({'id':'not_present_in_the_db'})
-        res = self.app.post('/api/action/group_show', params=postparams,
-                            status=StatusCodes.STATUS_404_NOT_FOUND)
-
-        res_obj = json.loads(res.body)
-        pprint(res_obj)
-        assert res_obj['error'] == {
-                '__type': 'Not Found Error',
-                'message': 'Not found'
-            }
-        assert res_obj['help'].startswith('Return the details of a group.')
-        assert res_obj['success'] is False
-
     def test_16_user_autocomplete(self):
         # Create deleted user to make sure he won't appear in the user_list
         deleted_user = CreateTestData.create_user('joe')
@@ -1740,59 +1663,6 @@ def test_02_bulk_delete(self):
         assert json.loads(res.body)['result']['count'] == 0
 
 
-class TestGroupOrgView(WsgiAppCase):
-
-    @classmethod
-    def setup_class(cls):
-        model.Session.add_all([
-            model.User(name=u'sysadmin', apikey=u'sysadmin',
-                       password=u'sysadmin', sysadmin=True),
-        ])
-        model.Session.commit()
-
-        org_dict = '%s=1' % json.dumps({
-            'name': 'org',
-        })
-        res = cls.app.post('/api/action/organization_create',
-                            extra_environ={'Authorization': 'sysadmin'},
-                            params=org_dict)
-        cls.org_id = json.loads(res.body)['result']['id']
-
-        group_dict = '%s=1' % json.dumps({
-            'name': 'group',
-        })
-        res = cls.app.post('/api/action/group_create',
-                            extra_environ={'Authorization': 'sysadmin'},
-                            params=group_dict)
-        cls.group_id = json.loads(res.body)['result']['id']
-
-    @classmethod
-    def teardown_class(self):
-        model.repo.rebuild_db()
-
-    def test_1_view_org(self):
-        res = self.app.get('/api/action/organization_show',
-                params={'id': self.org_id})
-        res_json = json.loads(res.body)
-        assert res_json['success'] is True
-
-        res = self.app.get('/api/action/group_show',
-                params={'id': self.org_id}, expect_errors=True)
-        res_json = json.loads(res.body)
-        assert res_json['success'] is False
-
-    def test_2_view_group(self):
-        res = self.app.get('/api/action/group_show',
-                params={'id': self.group_id})
-        res_json = json.loads(res.body)
-        assert res_json['success'] is True
-
-        res = self.app.get('/api/action/organization_show',
-                params={'id': self.group_id}, expect_errors=True)
-        res_json = json.loads(res.body)
-        assert res_json['success'] is False
-
-
 class TestResourceAction(WsgiAppCase):
 
     sysadmin_user = None

From 0c597eee9307226df588e900130ca6ebe3e5f623 Mon Sep 17 00:00:00 2001
From: Tom Mortimer-Jones <tom@morty.co.uk>
Date: Tue, 15 Jul 2014 20:55:05 +0100
Subject: [PATCH 27/36] Update set-permissions in testing docs

---
 doc/contributing/test.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/contributing/test.rst b/doc/contributing/test.rst
index e0316e60968..d77a4a884e6 100644
--- a/doc/contributing/test.rst
+++ b/doc/contributing/test.rst
@@ -45,7 +45,7 @@ Create test databases:
 
     sudo -u postgres createdb -O |database_user| |test_database| -E utf-8
     sudo -u postgres createdb -O |database_user| |test_datastore| -E utf-8
-    paster datastore set-permissions postgres -c test-core.ini
+    paster datastore set-permissions -c test-core.ini | sudo -u postgres psql
 
 This database connection is specified in the ``test-core.ini`` file by the
 ``sqlalchemy.url`` parameter.

From ca516c03de58c982df67b27710400702849b1b33 Mon Sep 17 00:00:00 2001
From: Tom Mortimer-Jones <tom@morty.co.uk>
Date: Tue, 15 Jul 2014 21:10:21 +0100
Subject: [PATCH 28/36] Update set-permissions in paster docs

---
 doc/maintaining/paster.rst | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/doc/maintaining/paster.rst b/doc/maintaining/paster.rst
index 833ea812fd9..e10688c156e 100644
--- a/doc/maintaining/paster.rst
+++ b/doc/maintaining/paster.rst
@@ -240,13 +240,7 @@ Make sure that the datastore URLs are set properly before you run these commands
 
 Usage::
 
-    datastore set-permissions SQL_SUPER_USER
-
-    Where:
-        SQL_SUPER_USER is the name of a postgres user with sufficient
-                       permissions to create new tables, users, and grant
-                       and revoke new permissions.  Typically, this would
-                       be the "postgres" user.
+    datastore set-permissions  - shows a SQL script to execute
 
 
 .. _paster db:

From 4e2ab892be6556bd904cf4082727e6c9252c8da4 Mon Sep 17 00:00:00 2001
From: Mikko Koho <mikko.koho@csc.fi>
Date: Mon, 21 Jul 2014 11:46:17 +0300
Subject: [PATCH 29/36] Add quotes to package ID and name in Solr queries to
 prevent Solr errors with custom dataset identifiers.

---
 ckan/lib/search/query.py    | 2 +-
 ckan/logic/action/update.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py
index 881ddc96938..9d0cb70f2fc 100644
--- a/ckan/lib/search/query.py
+++ b/ckan/lib/search/query.py
@@ -264,7 +264,7 @@ def get_all_entity_ids(self, max_results=1000):
     def get_index(self,reference):
         query = {
             'rows': 1,
-            'q': 'name:%s OR id:%s' % (reference,reference),
+            'q': 'name:"%s" OR id:"%s"' % (reference,reference),
             'wt': 'json',
             'fq': 'site_id:"%s"' % config.get('ckan.site_id')}
 
diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py
index 3cbd2abbe8d..770869e44e9 100644
--- a/ckan/logic/action/update.py
+++ b/ckan/logic/action/update.py
@@ -1189,7 +1189,7 @@ def process_solr(q):
     count = 0
     q = []
     for id in datasets:
-        q.append('id:%s' % (id))
+        q.append('id:"%s"' % (id))
         count += 1
         if count % BATCH_SIZE == 0:
             process_solr(' OR '.join(q))

From a2c979b81d9bf19dc8f9e4446a82b84154a24b1b Mon Sep 17 00:00:00 2001
From: amercader <amercadero@gmail.com>
Date: Tue, 22 Jul 2014 11:48:28 +0100
Subject: [PATCH 30/36] [#1436] Fix encoding error on user form when using
 email notifications

If the site title had special characters an encoding error was raised,
as the site title was an unicode string but the helper text wasn't. We
used the Jinja2 "string" filter to fix this. Translations should not be
affected so this can be backported.
---
 ckan/templates/user/edit_user_form.html | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ckan/templates/user/edit_user_form.html b/ckan/templates/user/edit_user_form.html
index 691a51317c6..7bd64d99a84 100644
--- a/ckan/templates/user/edit_user_form.html
+++ b/ckan/templates/user/edit_user_form.html
@@ -16,7 +16,8 @@
 
     {% if c.show_email_notifications %}
       {% call form.checkbox('activity_streams_email_notifications', label=_('Subscribe to notification emails'), id='field-activity-streams-email-notifications', value=True, checked=c.userobj.activity_streams_email_notifications) %}
-      {{ form.info(_("You will receive notification emails from {site_title}, e.g. when you have new activities on your dashboard.".format(site_title=g.site_title)), classes=['info-help-tight']) }}
+      {% set helper_text = _("You will receive notification emails from {site_title}, e.g. when you have new activities on your dashboard."|string) %}
+      {{ form.info(helper_text.format(site_title=g.site_title), classes=['info-help-tight']) }}
       {% endcall %}
     {% endif %}
 

From 76a00a44eade030ca4efddece85e7e8604c0617c Mon Sep 17 00:00:00 2001
From: nigelb <nigel.babu@okfn.org>
Date: Mon, 28 Jul 2014 11:18:23 +0530
Subject: [PATCH 31/36] Fix broken docstring, r=bustage

---
 ckan/plugins/interfaces.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py
index 593be9abddb..e3455bfeb24 100644
--- a/ckan/plugins/interfaces.py
+++ b/ckan/plugins/interfaces.py
@@ -206,7 +206,9 @@ class IResourceView(Interface):
 
     '''
     def info(self):
-        '''Return configuration for the view. Info can return
+        '''
+        Return configuration for the view. Info can return the following.
+
         :param name: name of view type
         :param title: title of view type (Optional)
         :param schema: schema to validate extra view config (Optional)

From 73909560b1d91f4c0605e56471a2e6787b0bdb54 Mon Sep 17 00:00:00 2001
From: David Read <david.read@hackneyworkshop.com>
Date: Mon, 28 Jul 2014 10:30:07 +0000
Subject: [PATCH 32/36] [#1768] Remove stray commented code.

---
 ckan/lib/dictization/model_dictize.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py
index 592ce8d3612..e735106350d 100644
--- a/ckan/lib/dictization/model_dictize.py
+++ b/ckan/lib/dictization/model_dictize.py
@@ -43,12 +43,6 @@ def group_list_dictize(obj_list, context,
             'include_extras': include_extras,
             'include_users': False,  # too slow - don't allow
             }
-        #('include_dataset_count', with_package_counts),
-        #('dataset_count_instead_of_dicts', True),
-        #('include_groups', include_groups),
-        #('include_datasets', False),  # too slow - don't allow
-        #('include_users', False),  # too slow - don't allow
-        #))
     if with_package_counts and 'dataset_counts' not in group_dictize_context:
         # 'dataset_counts' will already be in the context in the case that
         # group_list_dictize recurses via group_dictize (groups in groups)

From 26fb24c055efe99aaf122599f8c619eb4dfc1d31 Mon Sep 17 00:00:00 2001
From: Nick Stenning <nick@whiteink.com>
Date: Mon, 28 Jul 2014 16:33:59 +0200
Subject: [PATCH 33/36] docker: Don't do configuration in the runfile

This commit extracts the config file generation and database
initialisation parts of the CKAN service into separate scripts which are
run at container "boot" rather than in the service file.

This is just a bit cleaner, and means that the scripts are guaranteed to
only run once on boot, rather than every time CKAN is started (e.g. if
CKAN crashes, runit will rerun the `run` file, but not the init scripts)

This also makes it easier to replace the configuration step with, for
example, a standalone Python script if this is deemed easier to
maintain.
---
 Dockerfile                            |  1 +
 contrib/docker/my_init.d/50_configure | 64 ++++++++++++++++++++++++++
 contrib/docker/my_init.d/70_initdb    |  4 ++
 contrib/docker/svc/ckan/run           | 66 +--------------------------
 4 files changed, 70 insertions(+), 65 deletions(-)
 create mode 100755 contrib/docker/my_init.d/50_configure
 create mode 100755 contrib/docker/my_init.d/70_initdb

diff --git a/Dockerfile b/Dockerfile
index 110802d6ac9..84c831b066b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -49,6 +49,7 @@ RUN mkdir /var/cache/nginx
 ADD ./contrib/docker/main.cf /etc/postfix/main.cf
 
 # Configure runit
+ADD ./contrib/docker/my_init.d /etc/my_init.d
 ADD ./contrib/docker/svc /etc/service
 CMD ["/sbin/my_init"]
 
diff --git a/contrib/docker/my_init.d/50_configure b/contrib/docker/my_init.d/50_configure
new file mode 100755
index 00000000000..d8479bed1ac
--- /dev/null
+++ b/contrib/docker/my_init.d/50_configure
@@ -0,0 +1,64 @@
+#!/bin/sh
+
+# URL for the primary database, in the format expected by sqlalchemy (required
+# unless linked to a container called 'db')
+: ${DATABASE_URL:=}
+# URL for solr (required unless linked to a container called 'solr')
+: ${SOLR_URL:=}
+# Email to which errors should be sent (optional, default: none)
+: ${ERROR_EMAIL:=}
+
+set -eu
+
+CONFIG="${CKAN_CONFIG}/ckan.ini"
+
+abort () {
+  echo "$@" >&2
+  exit 1
+}
+
+write_config () {
+  "$CKAN_HOME"/bin/paster make-config ckan "$CONFIG"
+
+  sed -i \
+      -e "s&^sqlalchemy\.url =.*&sqlalchemy.url = ${DATABASE_URL}&" \
+      -e "s&^#solr_url.*&solr_url = ${SOLR_URL}&" \
+      -e "s&^#ckan.storage_path.*&ckan.storage_path = /var/lib/ckan&" \
+      -e "s&^email_to.*&#email_to = disabled@example.com&" \
+      -e "s&^error_email_from.*&error_email_from = ckan@$(hostname -f)&" \
+      "${CONFIG}"
+
+  if [ -n "$ERROR_EMAIL" ]; then
+    sed -i -e "s&^#email_to.*&email_to = ${ERROR_EMAIL}&" "$CONFIG"
+  fi
+}
+
+link_postgres_url () {
+  local user=$DB_ENV_POSTGRESQL_USER
+  local pass=$DB_ENV_POSTGRESQL_USER
+  local db=$DB_ENV_POSTGRESQL_DB
+  local host=$DB_PORT_5432_TCP_ADDR
+  local port=$DB_PORT_5432_TCP_PORT
+  echo "postgresql://${user}:${pass}@${host}:${port}/${db}"
+}
+
+link_solr_url () {
+  local host=$SOLR_PORT_8983_TCP_ADDR
+  local port=$SOLR_PORT_8983_TCP_PORT
+  echo "http://${host}:${port}/solr/ckan"
+}
+
+# If we don't already have a config file, bootstrap
+if [ ! -e "$CONFIG" ]; then
+  if [ -z "$DATABASE_URL" ]; then
+    if ! DATABASE_URL=$(link_postgres_url); then
+      abort "no DATABASE_URL specified and linked container called 'db' was not found"
+    fi
+  fi
+  if [ -z "$SOLR_URL" ]; then
+    if ! SOLR_URL=$(link_solr_url); then
+      abort "no SOLR_URL specified and linked container called 'solr' was not found"
+    fi
+  fi
+  write_config
+fi
diff --git a/contrib/docker/my_init.d/70_initdb b/contrib/docker/my_init.d/70_initdb
new file mode 100755
index 00000000000..54981b612de
--- /dev/null
+++ b/contrib/docker/my_init.d/70_initdb
@@ -0,0 +1,4 @@
+#!/bin/sh
+set -eu
+
+"$CKAN_HOME"/bin/paster --plugin=ckan db init -c "${CKAN_CONFIG}/ckan.ini"
diff --git a/contrib/docker/svc/ckan/run b/contrib/docker/svc/ckan/run
index 612ee193387..c8b0f2fe5ed 100755
--- a/contrib/docker/svc/ckan/run
+++ b/contrib/docker/svc/ckan/run
@@ -1,71 +1,7 @@
 #!/bin/sh
 exec 2>&1
 
-# URL for the primary database, in the format expected by sqlalchemy (required
-# unless linked to a container called 'db')
-: ${DATABASE_URL:=}
-# URL for solr (required unless linked to a container called 'solr')
-: ${SOLR_URL:=}
-# Email to which errors should be sent (optional, default: none)
-: ${ERROR_EMAIL:=}
+set -e
 
-set -eu
-
-CONFIG="${CKAN_CONFIG}/ckan.ini"
-
-abort () {
-  echo "$@" >&2
-  exit 1
-}
-
-bootstrap () {
-  "$CKAN_HOME"/bin/paster make-config ckan "$CONFIG"
-
-  sed -i \
-      -e "s&^sqlalchemy\.url =.*&sqlalchemy.url = ${DATABASE_URL}&" \
-      -e "s&^#solr_url.*&solr_url = ${SOLR_URL}&" \
-      -e "s&^#ckan.storage_path.*&ckan.storage_path = /var/lib/ckan&" \
-      -e "s&^email_to.*&#email_to = disabled@example.com&" \
-      -e "s&^error_email_from.*&error_email_from = ckan@$(hostname -f)&" \
-      "${CONFIG}"
-
-  if [ -n "$ERROR_EMAIL" ]; then
-    sed -i -e "s&^#email_to.*&email_to = ${ERROR_EMAIL}&" "$CONFIG"
-  fi
-
-  cd "$CKAN_HOME"/src/ckan && "$CKAN_HOME"/bin/paster db init -c "$CONFIG"
-}
-
-link_postgres_url () {
-  local user=$DB_ENV_POSTGRESQL_USER
-  local pass=$DB_ENV_POSTGRESQL_USER
-  local db=$DB_ENV_POSTGRESQL_DB
-  local host=$DB_PORT_5432_TCP_ADDR
-  local port=$DB_PORT_5432_TCP_PORT
-  echo "postgresql://${user}:${pass}@${host}:${port}/${db}"
-}
-
-link_solr_url () {
-  local host=$SOLR_PORT_8983_TCP_ADDR
-  local port=$SOLR_PORT_8983_TCP_PORT
-  echo "http://${host}:${port}/solr/ckan"
-}
-
-# If we don't already have a config file, bootstrap
-if [ ! -e "$CONFIG" ]; then
-  if [ -z "$DATABASE_URL" ]; then
-    if ! DATABASE_URL=$(link_postgres_url); then
-      abort "no DATABASE_URL specified and linked container called 'db' was not found"
-    fi
-  fi
-  if [ -z "$SOLR_URL" ]; then
-    if ! SOLR_URL=$(link_solr_url); then
-      abort "no SOLR_URL specified and linked container called 'solr' was not found"
-    fi
-  fi
-  bootstrap
-fi
-
-set +u
 . /etc/apache2/envvars
 exec /usr/sbin/apache2 -D FOREGROUND

From 21732d2c729bc48933487b54e1b5bcbe3f39e23d Mon Sep 17 00:00:00 2001
From: joetsoi <joe.yeung.tsoi@gmail.com>
Date: Tue, 29 Jul 2014 14:31:42 +0100
Subject: [PATCH 34/36] [#1701] PEP8

---
 ckan/new_tests/lib/search/test_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckan/new_tests/lib/search/test_index.py b/ckan/new_tests/lib/search/test_index.py
index 5daeafdbc17..29791824a1f 100644
--- a/ckan/new_tests/lib/search/test_index.py
+++ b/ckan/new_tests/lib/search/test_index.py
@@ -120,7 +120,7 @@ def test_index_date_field(self):
         assert_equal(
             response.results[0]['test_tim_date'].strftime('%Y-%m-%d %H:%M:%S'),
             '2014-03-22 05:42:14'
-            )
+        )
 
     def test_index_date_field_wrong_value(self):
 

From d00d276742cbe8b1a71fa52c191702127c6e6cea Mon Sep 17 00:00:00 2001
From: Simon Avril <simon.m.r.avril@gmail.com>
Date: Mon, 4 Aug 2014 15:04:06 +1000
Subject: [PATCH 35/36] Update user-guide.rst

---
 doc/user-guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/user-guide.rst b/doc/user-guide.rst
index 997b28e6e62..46734e8bb2d 100644
--- a/doc/user-guide.rst
+++ b/doc/user-guide.rst
@@ -63,7 +63,7 @@ government departments, each of which publishes data. Each organization can
 have its own workflow and authorizations, allowing it to manage its own
 publishing process.
 
-An organization's administrators can add add individual users to it, with
+An organization's administrators can add individual users to it, with
 different roles depending on the level of authorization needed. A user in an
 organization can create a dataset owned by that organization. In the default
 setup, this dataset is initially private, and visible only to other users in

From d9e624087d725501212ac75e9daf3cf46817e9fd Mon Sep 17 00:00:00 2001
From: Vitor Baptista <vitor@vitorbaptista.com>
Date: Mon, 4 Aug 2014 11:48:26 -0300
Subject: [PATCH 36/36] Fix typo in ofs.storage_dir deprecation message

---
 ckan/lib/uploader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py
index a9c843a0d0d..8456e24f420 100644
--- a/ckan/lib/uploader.py
+++ b/ckan/lib/uploader.py
@@ -27,8 +27,8 @@ def get_storage_path():
         if storage_path:
             _storage_path = storage_path
         elif ofs_impl == 'pairtree' and ofs_storage_dir:
-            log.warn('''Please use config option ckan.storage_path instaed of
-                     ofs.storage_path''')
+            log.warn('''Please use config option ckan.storage_path instead of
+                     ofs.storage_dir''')
             _storage_path = ofs_storage_dir
             return _storage_path
         elif ofs_impl: