Merge pull request #23 from GSA/revert-20-fix-pkg-extras

Revert "Fix pkg extras"
ckan · Sep 30, 2021 · eca0ae0 · eca0ae0
2 parents b653113 + b3b519a
commit eca0ae0
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 51 deletions.
diff --git a/ckanext/harvest/plugin/__init__.py b/ckanext/harvest/plugin/__init__.py
@@ -79,6 +79,26 @@ def after_delete(self, context, data_dict):
             # Delete the actual HarvestSource object
             _delete_harvest_source_object(context, package_dict)
 
+    def before_view(self, data_dict):
+
+        # check_ckan_version should be more clever than this
+        if p.toolkit.check_ckan_version(max_version='2.1.99') and (
+           'type' not in data_dict or data_dict['type'] != DATASET_TYPE_NAME):
+            # This is a normal dataset, check if it was harvested and if so, add
+            # info about the HarvestObject and HarvestSource
+            harvest_object = model.Session.query(HarvestObject) \
+                    .filter(HarvestObject.package_id == data_dict['id']) \
+                    .filter(HarvestObject.current==True).first() # noqa
+
+            if harvest_object:
+                for key, value in [
+                    ('harvest_object_id', harvest_object.id),
+                    ('harvest_source_id', harvest_object.source.id),
+                    ('harvest_source_title', harvest_object.source.title),
+                        ]:
+                    _add_extra(data_dict, key, value)
+        return data_dict
+
     def before_search(self, search_params):
         '''Prevents the harvesters being shown in dataset search results.'''
 
@@ -89,44 +109,6 @@ def before_search(self, search_params):
 
         return search_params
 
-    def before_index(self, pkg_dict):
-
-        harvest_object = model.Session.query(HarvestObject) \
-                .filter(HarvestObject.package_id == pkg_dict['id']) \
-                .filter(HarvestObject.current == True).first() # noqa
-
-        if harvest_object:
-
-            data_dict = json.loads(pkg_dict['data_dict'])
-
-            validated_data_dict = json.loads(pkg_dict['validated_data_dict'])
-
-            harvest_extras = [
-                ('harvest_object_id', harvest_object.id),
-                ('harvest_source_id', harvest_object.source.id),
-                ('harvest_source_title', harvest_object.source.title),
-            ]
-
-            for key, value in harvest_extras:
-
-                # If the harvest extras are there, remove them. This can happen eg
-                # when calling package_update or resource_update, which call
-                # package_show
-                if data_dict.get('extras'):
-                    data_dict['extras'][:] = [e for e in data_dict.get('extras', [])
-                                              if not e['key'] in harvest_extras]
-
-                data_dict['extras'].append({'key': key, 'value': value})
-
-                validated_data_dict['extras'].append({'key': key, 'value': value})
-
-                pkg_dict['extras_{}'.format('key')] = value
-
-            pkg_dict['data_dict'] = json.dumps(data_dict)
-            pkg_dict['validated_data_dict'] = json.dumps(validated_data_dict)
-
-        return pkg_dict
-
     def after_show(self, context, data_dict):
 
         if 'type' in data_dict and data_dict['type'] == DATASET_TYPE_NAME:
@@ -146,6 +128,36 @@ def after_show(self, context, data_dict):
 
             data_dict['status'] = status_action(context, {'id': source.id})
 
+        elif 'type' not in data_dict or data_dict['type'] != DATASET_TYPE_NAME:
+            # This is a normal dataset, check if it was harvested and if so, add
+            # info about the HarvestObject and HarvestSource
+
+            harvest_object = model.Session.query(HarvestObject) \
+                    .filter(HarvestObject.package_id == data_dict['id']) \
+                    .filter(HarvestObject.current == True).first() # noqa
+
+            # If the harvest extras are there, remove them. This can happen eg
+            # when calling package_update or resource_update, which call
+            # package_show
+            if data_dict.get('extras'):
+                data_dict['extras'][:] = [e for e in data_dict.get('extras', [])
+                                          if not e['key']
+                                          in ('harvest_object_id', 'harvest_source_id', 'harvest_source_title',)]
+
+            # We only want to add these extras at index time so they are part
+            # of the cached data_dict used to display, search results etc. We
+            # don't want them added when editing the dataset, otherwise we get
+            # duplicated key errors.
+            # The only way to detect indexing right now is checking that
+            # validate is set to False.
+            if harvest_object and not context.get('validate', True):
+                for key, value in [
+                    ('harvest_object_id', harvest_object.id),
+                    ('harvest_source_id', harvest_object.source.id),
+                    ('harvest_source_title', harvest_object.source.title),
+                        ]:
+                    _add_extra(data_dict, key, value)
+
         return data_dict
 
     # IDatasetForm
@@ -305,6 +317,15 @@ def organization_facets(self, facets_dict, organization_type, package_type):
                             ])
 
 
+def _add_extra(data_dict, key, value):
+    if 'extras' not in data_dict:
+        data_dict['extras'] = []
+
+    data_dict['extras'].append({
+        'key': key, 'value': value, 'state': u'active'
+    })
+
+
 def _get_logic_functions(module_root, logic_functions={}):
 
     for module_name in ['get', 'create', 'update', 'patch', 'delete']:

diff --git a/ckanext/harvest/tests/harvesters/test_ckanharvester.py b/ckanext/harvest/tests/harvesters/test_ckanharvester.py
@@ -171,19 +171,6 @@ def test_remote_groups_create(self):
         # Check that the remote group was created locally
         call_action('group_show', {}, id=mock_ckan.GROUPS[0]['id'])
 
-    def test_harvest_info_in_package_show(self):
-        results_by_guid = run_harvest(
-            url='http://localhost:%s' % mock_ckan.PORT,
-            harvester=CKANHarvester())
-        assert 'dataset1-id' in results_by_guid
-
-        # Check that the dataset extras has the harvest_object_id, harvest_source_id, and harvest_source_title
-        dataset = call_action('package_show', {"for_view": True}, id=mock_ckan.DATASETS[0]['id'])
-        extras_dict = dict((e['key'], e['value']) for e in dataset['extras'])
-        assert 'harvest_object_id' in extras_dict
-        assert 'harvest_source_id' in extras_dict
-        assert 'harvest_source_title' in extras_dict
-
     def test_remote_groups_only_local(self):
         # Create an existing group
         Group(id='group1-id', name='group1')