Merge branch '3484_revision_ui_removal2' into remove-vdm

ckan · Mar 18, 2019 · d3203f6 · d3203f6
2 parents d81a83a + d66d7de
commit d3203f6
Show file tree

Hide file tree

Showing 21 changed files with 345 additions and 118 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,10 +1,22 @@
 group: travis_latest
 language: python
 
+flake8-steps: &flake8-steps
+  env: FLAKE8=true
+  cache: pip
+  install: pip install flake8
+  before_script:
+    - flake8 --version
+    # stop the build if there are Python syntax errors or undefined names
+    - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ./ckan/include/rjsmin.py
+    # exit-zero treats all errors as warnings.  The GitHub editor is 127 chars wide
+    - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+  script:
+    - true
+
 matrix:
   include:
     - python: "2.7"
-      sudo: required
 
       services:
         - docker
@@ -29,31 +41,8 @@ matrix:
         - docker ps -a
 
     - python: "2.7"
-      env: FLAKE8=true
-      cache: pip
-      install:
-        - pip install flake8
-      before_script:
-        - flake8 --version
-        # stop the build if there are Python syntax errors or undefined names
-        - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ./ckan/include/rjsmin.py
-        # exit-zero treats all errors as warnings.  The GitHub editor is 127 chars wide
-        - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-      script:
-        - true
+      <<: *flake8-steps
 
     - python: "3.7"
-      env: FLAKE8=true
       dist: xenial    # required for Python 3.7
-      sudo: required  # required for Python 3.7
-      cache: pip
-      install:
-        - pip install flake8
-      before_script:
-        - flake8 --version
-        # stop the build if there are Python syntax errors or undefined names
-        - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
-        # exit-zero treats all errors as warnings.  The GitHub editor is 127 chars wide
-        - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-      script:
-        - true
+      <<: *flake8-steps
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -11,10 +11,11 @@ v.2.9.0 TBA
 ==================
 
  * This version requires script 'migrate_package_activity.py' to be run
-   *before* CKAN is upgraded to this version (or higher). This is because this
-   script takes a while to run, adding in the Activity Stream detail, visible
-   only to admins by default. You will not be able to run ``paster db upgrade``
-   until 'migrate_package_activity.py' is done.
+   *before* CKAN is upgraded to this version (or higher). The idea is you do
+   this special migration while CKAN is running, because the script takes a
+   while to run. It adds in the Activity Stream detail, visible only to admins
+   by default. You will not be able to run ``paster db upgrade`` until
+   'migrate_package_activity.py' is done.
    Download and run migrate_package_activity.py like this:
 
      cd /usr/lib/ckan/default/src/ckan/

diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py
@@ -393,14 +393,13 @@ def read(self, id):
 
         # can the resources be previewed?
         for resource in c.pkg_dict['resources']:
-            # Backwards compatibility with preview interface
-            resource['can_be_previewed'] = self._resource_preview(
-                {'resource': resource, 'package': c.pkg_dict})
-
             resource_views = get_action('resource_view_list')(
                 context, {'id': resource['id']})
             resource['has_views'] = len(resource_views) > 0
 
+            # Backwards compatibility with preview interface
+            resource['can_be_previewed'] = bool(len(resource_views))
+
         package_type = c.pkg_dict['type'] or 'dataset'
         self._setup_template_variables(context, {'id': id},
                                        package_type=package_type)
@@ -1116,13 +1115,12 @@ def resource_read(self, id, resource_id):
         c.datastore_api = '%s/api/action' % \
             config.get('ckan.site_url', '').rstrip('/')
 
-        c.resource['can_be_previewed'] = self._resource_preview(
-            {'resource': c.resource, 'package': c.package})
-
         resource_views = get_action('resource_view_list')(
             context, {'id': resource_id})
         c.resource['has_views'] = len(resource_views) > 0
 
+        c.resource['can_be_previewed'] = bool(len(resource_views))
+
         current_resource_view = None
         view_id = request.GET.get('view_id')
         if c.resource['can_be_previewed'] and not view_id:

diff --git a/ckan/controllers/user.py b/ckan/controllers/user.py
@@ -497,7 +497,7 @@ def request_reset(self):
                     mailer.send_reset_link(user_obj)
                     h.flash_success(_('Please check your inbox for '
                                     'a reset code.'))
-                    h.redirect_to('/')
+                    h.redirect_to(u'home.index')
                 except mailer.MailerException as e:
                     h.flash_error(_('Could not send reset link: %s') %
                                   text_type(e))
@@ -542,7 +542,7 @@ def perform_reset(self, id):
                 mailer.create_reset_key(user_obj)
 
                 h.flash_success(_("Your password has been reset."))
-                h.redirect_to('/')
+                h.redirect_to(u'home.index')
             except NotAuthorized:
                 h.flash_error(_('Unauthorized to edit user %s') % id)
             except NotFound as e:

diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py
@@ -13,7 +13,6 @@
 '''
 import datetime
 import urlparse
-import copy
 
 from ckan.common import config
 from sqlalchemy.sql import select
@@ -633,19 +632,13 @@ def vocabulary_list_dictize(vocabulary_list, context):
 def activity_dictize(activity, context, include_data=False):
     activity_dict = d.table_dictize(activity, context)
     if not include_data:
-        # take a copy of the activity data, since the original may be used
-        # elsewhere during the same render and we don't want to affect that
-        activity_dict['data'] = copy.deepcopy(activity_dict['data'])
-        # delete all the data apart from the title field on each data object,
-        # because that is needed to display it in the activity stream
-        for obj_key in activity_dict['data'].keys():
-            obj_data = activity_dict['data'][obj_key]
-            if isinstance(obj_data, dict):
-                for key in obj_data.keys():
-                    if key != 'title':
-                        del obj_data[key]
-            else:
-                del activity_dict['data'][obj_key]
+        # replace the data with just a {'title': title} and not the rest of
+        # the dataset/group/org/custom obj. we need the title to display it
+        # in the activity stream.
+        activity_dict['data'] = {
+            key: {'title': val['title']}
+            for (key, val) in activity_dict['data'].items()
+            if isinstance(val, dict) and 'title' in val}
     return activity_dict
 
 

diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py
@@ -55,7 +55,8 @@ def text_traceback():
     'package': PackageSearchQuery
 }
 
-SOLR_SCHEMA_FILE_OFFSET = '/admin/file/?file=schema.xml'
+SOLR_SCHEMA_FILE_OFFSET_MANAGED = '/schema?wt=schema.xml'
+SOLR_SCHEMA_FILE_OFFSET_CLASSIC = '/admin/file/?file=schema.xml'
 
 
 def _normalize_type(_type):
@@ -246,17 +247,37 @@ def clear_all():
     log.debug("Clearing search index...")
     package_index.clear()
 
+def _get_schema_from_solr(file_offset):
+    solr_url, solr_user, solr_password = SolrSettings.get()
+
+    http_auth = None
+    if solr_user is not None and solr_password is not None:
+        http_auth = solr_user + ':' + solr_password
+        http_auth = 'Basic ' + http_auth.encode('base64').strip()
+
+    url = solr_url.strip('/') + file_offset
+
+    req = urllib2.Request(url=url)
+    if http_auth:
+        req.add_header('Authorization', http_auth)
+
+    return urllib2.urlopen(req)
 
 def check_solr_schema_version(schema_file=None):
     '''
         Checks if the schema version of the SOLR server is compatible
         with this CKAN version.
 
         The schema will be retrieved from the SOLR server, using the
-        offset defined in SOLR_SCHEMA_FILE_OFFSET
-        ('/admin/file/?file=schema.xml'). The schema_file parameter
-        allows to override this pointing to different schema file, but
-        it should only be used for testing purposes.
+        offset defined in SOLR_SCHEMA_FILE_OFFSET_MANAGED
+        ('/schema?wt=schema.xml'). If SOLR is set to use the manually
+        edited `schema.xml`, the schema will be retrieved from the SOLR
+        server using the offset defined in
+        SOLR_SCHEMA_FILE_OFFSET_CLASSIC ('/admin/file/?file=schema.xml').
+
+        The schema_file parameter allows to override this pointing to
+        different schema file, but it should only be used for testing
+        purposes.
 
         If the CKAN instance is configured to not use SOLR or the SOLR
         server is not available, the function will return False, as the
@@ -275,20 +296,12 @@ def check_solr_schema_version(schema_file=None):
 
     # Try to get the schema XML file to extract the version
     if not schema_file:
-        solr_url, solr_user, solr_password = SolrSettings.get()
-
-        http_auth = None
-        if solr_user is not None and solr_password is not None:
-            http_auth = solr_user + ':' + solr_password
-            http_auth = 'Basic ' + http_auth.encode('base64').strip()
-
-        url = solr_url.strip('/') + SOLR_SCHEMA_FILE_OFFSET
-
-        req = urllib2.Request(url=url)
-        if http_auth:
-            req.add_header('Authorization', http_auth)
-
-        res = urllib2.urlopen(req)
+        try:
+            # Try Managed Schema
+            res = _get_schema_from_solr(SOLR_SCHEMA_FILE_OFFSET_MANAGED)
+        except urllib2.HTTPError:
+            # Fallback to Manually Edited schema.xml
+            res = _get_schema_from_solr(SOLR_SCHEMA_FILE_OFFSET_CLASSIC)
     else:
         url = 'file://%s' % schema_file
         res = urllib2.urlopen(url)

diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py
@@ -2450,7 +2450,7 @@ def group_activity_list(context, data_dict):
     '''
     # FIXME: Filter out activities whose subject or object the user is not
     # authorized to read.
-    data_dict['include_data'] = False
+    data_dict = dict(data_dict, include_data=False)
     include_hidden_activity = asbool(context.get('include_hidden_activity'))
     _check_access('group_activity_list', context, data_dict)
 

diff --git a/ckan/migration/migrate_package_activity.py b/ckan/migration/migrate_package_activity.py
@@ -32,6 +32,7 @@
 import sys
 from collections import defaultdict
 from six.moves import input
+from six import text_type
 
 # not importing anything from ckan until after the arg parsing, to fail on bad
 # args quickly.
@@ -102,24 +103,22 @@ def migrate_dataset(dataset_name, errors):
     import ckan.logic as logic
     from ckan import model
 
-    context = get_context()
     # 'hidden' activity is that by site_user, such as harvests, which are
     # not shown in the activity stream because they can be too numerous.
     # However these do have Activity objects, and if a hidden Activity is
     # followed be a non-hidden one and you look at the changes of that
     # non-hidden Activity, then it does a diff with the hidden one (rather than
     # the most recent non-hidden one), so it is important to store the
     # package_dict in hidden Activity objects.
-    context[u'include_hidden_activity'] = True
+    context = dict(get_context(), include_hidden_activity=True)
     package_activity_stream = logic.get_action(u'package_activity_list')(
         context, {u'id': dataset_name})
     num_activities = len(package_activity_stream)
     if not num_activities:
         print(u'  No activities')
 
-    context[u'for_view'] = False
     # Iterate over this package's existing activity stream objects
-    for i, activity in enumerate(package_activity_stream):
+    for i, activity in enumerate(reversed(package_activity_stream)):
         # e.g. activity =
         # {'activity_type': u'changed package',
         #  'id': u'62107f87-7de0-4d17-9c30-90cbffc1b296',
@@ -137,18 +136,29 @@ def migrate_dataset(dataset_name, errors):
                   ' - no action')
             continue
 
-        # get the dataset as it was at this revision
-        context[u'revision_id'] = activity[u'revision_id']
+        # get the dataset as it was at this revision:
         # call package_show just as we do in package.py:activity_stream_item(),
-        # only with a revision_id
+        # only with a revision_id (to get it as it was then)
+        context = dict(
+            get_context(),
+            for_view=False,
+            revision_id=activity[u'revision_id'],
+            use_cache=False,  # avoid the cache (which would give us the
+                              # latest revision)
+        )
         try:
             dataset = logic.get_action(u'package_show')(
                 context,
                 {u'id': activity[u'object_id'], u'include_tracking': False})
-        except logic.NotFound as exc:
-            print(u'    Revision missing! Skipping this version '
-                  '(revision_id={})'.format(activity[u'revision_id']))
-            errors['Revision missing'] += 1
+        except Exception as exc:
+            if isinstance(exc, logic.NotFound):
+                error_msg = u'Revision missing'
+            else:
+                error_msg = text_type(exc)
+            print(u'    Error: {}! Skipping this version '
+                  '(revision_id={})'
+                  .format(error_msg, activity[u'revision_id']))
+            errors[error_msg] += 1
             # We shouldn't leave the activity.data['package'] with missing
             # resources, extras & tags, which could cause the package_read
             # template to raise an exception, when user clicks "View this
@@ -157,7 +167,9 @@ def migrate_dataset(dataset_name, errors):
             try:
                 dataset = {u'title': activity_obj.data['package']['title']}
             except KeyError:
-                dataset = None
+                # unlikely the package is not recorded in the activity, but
+                # not impossible
+                dataset = {u'title': u'unknown'}
 
         # get rid of revision_timestamp, which wouldn't be there if saved by
         # during activity_stream_item() - something to do with not specifying
@@ -187,10 +199,12 @@ def migrate_dataset(dataset_name, errors):
 
 def wipe_activity_detail(delete_activity_detail):
     from ckan import model
-    num_activity_detail_rows = \
-        model.Session.execute(u'SELECT count(*) FROM "activity_detail";') \
-        .fetchall()[0][0]
-    if num_activity_detail_rows == 0:
+    activity_detail_has_rows = \
+        bool(model.Session.execute(
+            u'SELECT count(*) '
+            'FROM (SELECT * FROM "activity_detail" LIMIT 1) as t;')
+            .fetchall()[0][0])
+    if not activity_detail_has_rows:
         print(u'\nactivity_detail table is aleady emptied')
         return
     print(
@@ -204,7 +218,7 @@ def wipe_activity_detail(delete_activity_detail):
         delete_activity_detail = \
             input(u'Delete activity_detail table content? (y/n):')
     if delete_activity_detail.lower()[:1] != u'y':
-        sys.exit(0)
+        return
     from ckan import model
     model.Session.execute(u'DELETE FROM "activity_detail";')
     model.Session.commit()

diff --git a/ckan/migration/revision_legacy_code.py b/ckan/migration/revision_legacy_code.py
@@ -107,7 +107,7 @@ def package_dictize_with_revisions(pkg, context):
     else:
         group = model.group_revision_table
     q = select([group]
-               ).where(group.c.id == pkg.owner_org) \
+               ).where(group.c.id == result_dict['owner_org']) \
                 .where(group.c.state == u'active')
     result = execute(q, group, context)
     organizations = d.obj_list_dictize(result, context)
@@ -153,9 +153,12 @@ def package_dictize_with_revisions(pkg, context):
         result_dict['license_title'] = pkg.license_id
 
     # creation and modification date
-    result_dict['metadata_modified'] = pkg.metadata_modified.isoformat()
-    result_dict['metadata_created'] = pkg.metadata_created.isoformat() \
-        if pkg.metadata_created else None
+    if is_latest_revision:
+        result_dict['metadata_modified'] = pkg.metadata_modified.isoformat()
+    # (If not is_latest_revision, don't use pkg which is the latest version.
+    # Instead, use the dates already in result_dict that came from the dictized
+    # PackageRevision)
+    result_dict['metadata_created'] = pkg.metadata_created.isoformat()
 
     return result_dict
 

diff --git a/ckan/migration/versions/065_add_email_notifications_preference.py b/ckan/migration/versions/065_add_email_notifications_preference.py
@@ -7,6 +7,6 @@ def upgrade(migrate_engine):
     metadata = MetaData()
     metadata.bind = migrate_engine
     migrate_engine.execute('''
-ALTER TABLE public.user
+ALTER TABLE "user"
     ADD COLUMN activity_streams_email_notifications BOOLEAN DEFAULT FALSE;
     ''')