From 89b41052c596d60eb733d7202ad5d6dd7bb15c78 Mon Sep 17 00:00:00 2001
From: hayley-leblanc
Date: Wed, 24 Jul 2019 13:08:08 +0200
Subject: [PATCH 01/47] Add more detailed change summary for metadata fields
Adds a more readable summary of changes between two versions of a dataset (only for dataset metadata fields - title, description, license, etc. and only for those that have actually changed) to the Changes page. The comprehensive but less readable metadata diff is still there but is opened with a button and not shown by default. Does not yet show a summary for custom fields or fields that come with extensions - only shows changes for the default fields.
---
ckan/lib/helpers.py | 166 ++++++++++++++++++++++++++++
ckan/templates/package/changes.html | 54 ++++++++-
2 files changed, 218 insertions(+), 2 deletions(-)
diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index 577d62232ac..5b0f2db1e18 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -2688,3 +2688,169 @@ def sanitize_id(id_):
ValueError.
'''
return str(uuid.UUID(id_))
+
+@core_helper
+def compare_pkg_dicts(original, new):
+ # TODO: clean this up or make it shorter somehow
+
+ change_list = []
+
+ for key in original:
+ log.info("original[" + str(key) + "]: " + str(original[key]))
+ if key in new:
+ log.info("new[" + str(key) + "]: " + str(new[key]))
+
+
+ s = ""
+ seq1 = ("",
+ new['title'], "")
+
+ # if the title has changed
+ if original['title'] != new['title']:
+ seq2 = ("",
+ new['title'], "")
+ change_list.append(["Changed title to", s.join(seq2)])
+
+ # if the owner organization changed
+ if original['owner_org'] != new['owner_org']:
+ seq2 = ("",
+ original['organization']['title'], "")
+ seq3 = ("",
+ new['organization']['title'], "")
+ change_list.append(["Moved", s.join(seq1),
+ "from organization",
+ s.join(seq2),
+ "to organization",
+ s.join(seq3)])
+
+ # if the maintainer of the dataset changed
+ if original['maintainer'] != new['maintainer']:
+ # if the original dataset had a maintainer
+ if original['maintainer']:
+ change_list.append(["Set maintainer of", s.join(seq1), "to", new['maintainer'], "(previously", original['maintainer'] + ")"])
+ else:
+ change_list.append(["Set maintainer of", s.join(seq1), "to", new['maintainer']])
+
+
+ # if the maintainer email of the dataset changed
+ if original['maintainer_email'] != new['maintainer_email']:
+ seq2 = ("", new['maintainer_email'], "")
+ # if the original dataset had a maintainer email
+ if original['maintainer_email']:
+ seq3 = ("", original['maintainer_email'], "")
+ change_list.append(["Set maintainer e-mail of", s.join(seq1), "to", s.join(seq2), "(previously", s.join(seq3) + ")"])
+ else:
+ change_list.append(["Set maintainer e-mail of", s.join(seq1), "to", s.join(seq2)])
+
+ # if the author of the dataset changed
+ if original['author'] != new['author']:
+ # if the original dataset had an author
+ if original['author']:
+ change_list.append(["Set author of", s.join(seq1), "to", new['author'], "(previously", original['author'] + ")"])
+ else:
+ change_list.append(["Set author of", s.join(seq1), "to", new['author']])
+
+ # if the author email of the dataset changed
+ if original['author_email'] != new['author_email']:
+ seq2 = ("", new['author_email'], "")
+ # if the original dataset had a author email
+ if original['author_email']:
+ seq3 = ("", original['author_email'], "")
+ change_list.append(["Set author e-mail of", s.join(seq1), "to", s.join(seq2), "(previously", s.join(seq3) + ")"])
+ else:
+ change_list.append(["Set author e-mail of", s.join(seq1), "to", s.join(seq2)])
+
+ # if the visibility of the dataset changed
+ if original['private'] != new['private']:
+ change_list.append(["Set visibility of", s.join(seq1), "to", "Private" if new['private'] else "Public"])
+
+ # if the description of the dataset changed
+ if original['notes'] != new['notes']:
+ # displays the two descriptions (like how they are displayed on resource views)
+ # TODO: figure out the best way to format this stuff
+
+ # if the original dataset had a description
+ if original['notes']:
+ change_list.append(["Updated description of", s.join(seq1),
+ "from ", "
"])
+
+ # make sets out of the tags for each dataset
+ original_tags = set([tag['name'] for tag in original['tags']])
+ new_tags = set([tag['name'] for tag in new['tags']])
+ # if the tags have changed
+ if original_tags != new_tags:
+ deleted_tags = original_tags - new_tags
+ deleted_tags_list = list(deleted_tags)
+ if len(deleted_tags) == 1:
+ seq2 = ("",
+ deleted_tags_list[0], "")
+ change_list.append(["Removed tag", s.join(seq2), "from", s.join(seq1)])
+ elif len(deleted_tags) > 1:
+ seq2 = ["
"
+ for i in range(0, len(added_tags))]
+ change_list.append(["Added the following tags to", s.join(seq1), "
", s.join(seq2), "
"])
+
+ # if the license has changed
+ if original['license_title'] != new['license_title']:
+ seq2 = ()
+ seq3 = ()
+ # if the license has a URL, use it
+ if 'license_url' in original and original['license_url']:
+ seq2 = ("", original['license_title'], "")
+ else:
+ seq2 = (original['license_title'])
+ if 'license_url' in new and new['license_url']:
+ seq3 = ("", new['license_title'], "")
+ else:
+ seq3 = (new['license_title'])
+ change_list.append(["Changed the license of", s.join(seq1), "to", s.join(seq3), "(previously", s.join(seq2) + ")"])
+
+ # if the name of the dataset has changed
+ # this is only visible to the user via the dataset's URL, so display the change using that
+ if original['name'] != new['name']:
+ old_url = url_for(qualified=True, controller="dataset",
+ action="read", id=original['name'])
+ new_url = url_for(qualified=True, controller="dataset",
+ action="read", id=new['name'])
+ seq2 = ("", old_url, "")
+ seq3 = ("", new_url, "")
+ change_list.append(["Moved the dataset from", s.join(seq2), "to", s.join(seq3)])
+
+ # if the source URL (metadata value, not the actual URL of the dataset) has changed
+ if original['url'] != new['url']:
+ seq2 = ("", original['url'], "")
+ seq3 = ("", new['url'], "")
+ if original['url']:
+ change_list.append(["Changed the source URL of", s.join(seq1), "from", s.join(seq2), "to", s.join(seq3)])
+ else:
+ change_list.append(["Changed the source URL of", s.join(seq1), "to", s.join(seq3)])
+
+ # if the user-provided version has changed
+ if original['version'] != new['version']:
+ if original['version']:
+ change_list.append(["Changed the version of", s.join(seq1), "from", original['version'], "to", new['version']])
+ else:
+ change_list.append(["Changed the version of", s.join(seq1), "to", new['version']])
+
+
+
+ return change_list
diff --git a/ckan/templates/package/changes.html b/ckan/templates/package/changes.html
index eb1fd899063..45e8caf8e83 100644
--- a/ckan/templates/package/changes.html
+++ b/ckan/templates/package/changes.html
@@ -15,20 +15,70 @@
{% block package_changes_header %}
{{ _('Changes') }}
- Dataset: {% link_for pkg_dict.title, controller='package', action='read', id=pkg_dict.name %}
+ {##}
{% endblock %}
+ {# iterate through the activities that are NOT the original one -
+ iterate, rather than just look at the second one, so that we could
+ potentially add a way to look at the diff between larger sets of
+ datasets in the future #}
+
+ {% for i in range(1, activity_diff.activities|length) %}
+ On {{ h.render_datetime(activity_diff.activities[1].timestamp, with_hours=True, with_seconds=True) }}, {{ h.linked_user(activity_diff.activities[1].user_id) }}:
+
+
+ {% set changes = h.compare_pkg_dicts(activity_diff.activities[0].data.package, activity_diff.activities[1].data.package) %}
+
+ {% for change in changes %}
+
+ {% for element in change %}
+ {{ element|safe }}
+ {% endfor %}
+
+
+ {% endfor %}
+
+
+
+ {% endfor %}
+
+ {# button to show JSON metadata diff - not shown by default #}
+
+
+
{% block package_changes_diff %}
{{ activity_diff['diff']|safe }}
{% endblock %}
+
+
+
+
+
{% endblock %}
From 680ede035eab5bca1f1779f8bcb838e6a8d119f5 Mon Sep 17 00:00:00 2001
From: hayley-leblanc
Date: Fri, 19 Jul 2019 10:58:54 +0200
Subject: [PATCH 02/47] Save activity data for private datasets
---
ckan/lib/activity_streams_session_extension.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/ckan/lib/activity_streams_session_extension.py b/ckan/lib/activity_streams_session_extension.py
index 3dec8a95173..decdeeb6cde 100644
--- a/ckan/lib/activity_streams_session_extension.py
+++ b/ckan/lib/activity_streams_session_extension.py
@@ -70,8 +70,8 @@ def before_commit(self, session):
# object is a package.
# Don't create activities for private datasets.
- if obj.private:
- continue
+ #if obj.private:
+ # continue
activities[obj.id] = activity
@@ -106,8 +106,8 @@ def before_commit(self, session):
continue
# Don't create activities for private datasets.
- if package.private:
- continue
+ #if package.private:
+ # continue
if package.id in activities:
continue
From 3fc42cedd079f469eb053451de3125c1c37cdc0c Mon Sep 17 00:00:00 2001
From: hayley-leblanc
Date: Wed, 24 Jul 2019 15:19:09 +0200
Subject: [PATCH 03/47] Display changes to metadata fields added by extensions
---
ckan/lib/helpers.py | 34 +++++++++++++++++++++++++++++
ckan/templates/package/changes.html | 3 +--
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index 5b0f2db1e18..5af4e986a0e 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -2692,6 +2692,7 @@ def sanitize_id(id_):
@core_helper
def compare_pkg_dicts(original, new):
# TODO: clean this up or make it shorter somehow
+ # TODO: what happens if someone REMOVES a value from a field that previously had a value?
change_list = []
@@ -2851,6 +2852,39 @@ def compare_pkg_dicts(original, new):
else:
change_list.append(["Changed the version of", s.join(seq1), "to", new['version']])
+ # list of the default metadata fields for a dataset
+ # any fields that are not part of this list are custom fields added by a user or extension
+ fields = ['owner_org', 'maintainer', 'maintainer_email', 'relationships_as_object', 'private', 'num_tags',
+ 'id', 'metadata_created', 'metadata_modified', 'author', 'author_email', 'state', 'version',
+ 'license_id', 'type', 'resources', 'num_resources', 'tags', 'title', 'groups', 'creator_user_id',
+ 'relationships_as_subject', 'name', 'isopen', 'url', 'notes', 'license_title', 'extras',
+ 'license_url', 'organization', 'revision_id']
+ fields_set = set(fields)
+
+ # if there are any fields from extensions that are in the new dataset and
+ # have been updated, print a generic message stating that
+ original_set = set(original.keys())
+ new_set = set(new.keys())
+
+ addl_fields_new = new_set - fields_set # set of additional fields in the new dictionary
+ addl_fields_original = original_set - fields_set # set of additional fields in the original dictionary
+ addl_fields = addl_fields_new.intersection(addl_fields_original) # set of additional fields in both
+
+ # do NOT display a change if any additional fields have been added or deleted,
+ # since that is not a change made by the user from the web interface
+
+ # if additional fields have been changed
+ addl_fields_list = list(addl_fields)
+ for field in addl_fields_list:
+ if original[field] != new[field]:
+ if original[field]:
+ change_list.append(["Changed field", field.capitalize(), "to", new[field], "(previously", original[field] + ")", "in", s.join(seq1)])
+
+ # check the extras field to see if anything has been added, deleted, or changed
+ # that is where custom fields added via the web interface go - they do not become
+ # actual fields in a package dict
+ # TODO: add this ^^
+
return change_list
diff --git a/ckan/templates/package/changes.html b/ckan/templates/package/changes.html
index 45e8caf8e83..996494955ef 100644
--- a/ckan/templates/package/changes.html
+++ b/ckan/templates/package/changes.html
@@ -31,10 +31,9 @@
{{ _('Changes') }}
{% for i in range(1, activity_diff.activities|length) %}
On {{ h.render_datetime(activity_diff.activities[1].timestamp, with_hours=True, with_seconds=True) }}, {{ h.linked_user(activity_diff.activities[1].user_id) }}:
-
{% set changes = h.compare_pkg_dicts(activity_diff.activities[0].data.package, activity_diff.activities[1].data.package) %}
-
+
{% for change in changes %}
{% for element in change %}
From 9732e969ff160b7be2eb3b6ce210834f9b2e4e59 Mon Sep 17 00:00:00 2001
From: hayley-leblanc
Date: Thu, 25 Jul 2019 10:36:15 +0200
Subject: [PATCH 04/47] Check changes to custom fields and add function for
each check
---
ckan/lib/helpers.py | 355 ++++++++++++++++++++++++++++++--------------
1 file changed, 241 insertions(+), 114 deletions(-)
diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index 5af4e986a0e..8ee2057671b 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -2691,167 +2691,221 @@ def sanitize_id(id_):
@core_helper
def compare_pkg_dicts(original, new):
- # TODO: clean this up or make it shorter somehow
# TODO: what happens if someone REMOVES a value from a field that previously had a value?
change_list = []
- for key in original:
- log.info("original[" + str(key) + "]: " + str(original[key]))
- if key in new:
- log.info("new[" + str(key) + "]: " + str(new[key]))
-
-
s = ""
seq1 = ("",
new['title'], "")
+ new_pkg = s.join(seq1)
# if the title has changed
if original['title'] != new['title']:
- seq2 = ("",
- new['title'], "")
- change_list.append(["Changed title to", s.join(seq2)])
+ _title_change(change_list, original, new)
# if the owner organization changed
if original['owner_org'] != new['owner_org']:
- seq2 = ("",
- original['organization']['title'], "")
- seq3 = ("",
- new['organization']['title'], "")
- change_list.append(["Moved", s.join(seq1),
- "from organization",
- s.join(seq2),
- "to organization",
- s.join(seq3)])
+ _org_change(change_list, original, new, new_pkg)
# if the maintainer of the dataset changed
if original['maintainer'] != new['maintainer']:
- # if the original dataset had a maintainer
- if original['maintainer']:
- change_list.append(["Set maintainer of", s.join(seq1), "to", new['maintainer'], "(previously", original['maintainer'] + ")"])
- else:
- change_list.append(["Set maintainer of", s.join(seq1), "to", new['maintainer']])
-
+ _maintainer_change(change_list, original, new, new_pkg)
# if the maintainer email of the dataset changed
if original['maintainer_email'] != new['maintainer_email']:
- seq2 = ("", new['maintainer_email'], "")
- # if the original dataset had a maintainer email
- if original['maintainer_email']:
- seq3 = ("", original['maintainer_email'], "")
- change_list.append(["Set maintainer e-mail of", s.join(seq1), "to", s.join(seq2), "(previously", s.join(seq3) + ")"])
- else:
- change_list.append(["Set maintainer e-mail of", s.join(seq1), "to", s.join(seq2)])
+ _maintainer_email_change(change_list, original, new, new_pkg)
# if the author of the dataset changed
if original['author'] != new['author']:
- # if the original dataset had an author
- if original['author']:
- change_list.append(["Set author of", s.join(seq1), "to", new['author'], "(previously", original['author'] + ")"])
- else:
- change_list.append(["Set author of", s.join(seq1), "to", new['author']])
+ _author_change(change_list, original, new, new_pkg)
# if the author email of the dataset changed
if original['author_email'] != new['author_email']:
- seq2 = ("", new['author_email'], "")
- # if the original dataset had a author email
- if original['author_email']:
- seq3 = ("", original['author_email'], "")
- change_list.append(["Set author e-mail of", s.join(seq1), "to", s.join(seq2), "(previously", s.join(seq3) + ")"])
- else:
- change_list.append(["Set author e-mail of", s.join(seq1), "to", s.join(seq2)])
+ _author_email_change(change_list, original, new, new_pkg)
# if the visibility of the dataset changed
if original['private'] != new['private']:
- change_list.append(["Set visibility of", s.join(seq1), "to", "Private" if new['private'] else "Public"])
+ change_list.append(["Set visibility of", new_pkg, "to", "Private" if new['private'] else "Public"])
# if the description of the dataset changed
if original['notes'] != new['notes']:
- # displays the two descriptions (like how they are displayed on resource views)
- # TODO: figure out the best way to format this stuff
-
- # if the original dataset had a description
- if original['notes']:
- change_list.append(["Updated description of", s.join(seq1),
- "from ", "
"])
+ _description_change(change_list, original, new, new_pkg)
# make sets out of the tags for each dataset
original_tags = set([tag['name'] for tag in original['tags']])
new_tags = set([tag['name'] for tag in new['tags']])
# if the tags have changed
if original_tags != new_tags:
- deleted_tags = original_tags - new_tags
- deleted_tags_list = list(deleted_tags)
- if len(deleted_tags) == 1:
- seq2 = ("",
- deleted_tags_list[0], "")
- change_list.append(["Removed tag", s.join(seq2), "from", s.join(seq1)])
- elif len(deleted_tags) > 1:
- seq2 = ["
"
+ for i in range(0, len(added_tags))]
+ change_list.append(["Added the following tags to", new_pkg, "
", s.join(seq2), "
"])
+
+def _license_change(change_list, original, new, new_pkg):
+ s = ""
+ seq2 = ()
+ seq3 = ()
+ # if the license has a URL, use it
+ if 'license_url' in original and original['license_url']:
+ seq2 = ("", original['license_title'], "")
+ else:
+ seq2 = (original['license_title'])
+ if 'license_url' in new and new['license_url']:
+ seq3 = ("", new['license_title'], "")
+ else:
+ seq3 = (new['license_title'])
+ change_list.append(["Changed the license of", new_pkg, "to", s.join(seq3), "(previously", s.join(seq2) + ")"])
+
+def _name_change(change_list, original, new):
+ s = ""
+ old_url = url_for(qualified=True, controller="dataset",
+ action="read", id=original['name'])
+ new_url = url_for(qualified=True, controller="dataset",
+ action="read", id=new['name'])
+ seq2 = ("", old_url, "")
+ seq3 = ("", new_url, "")
+ change_list.append(["Moved the dataset from", s.join(seq2), "to", s.join(seq3)])
+
+def _source_url_change(change_list, original, new, new_pkg):
+ s = ""
+ seq2 = ("", original['url'], "")
+ seq3 = ("", new['url'], "")
+ if original['url']:
+ change_list.append(["Changed the source URL of", new_pkg, "from", s.join(seq2), "to", s.join(seq3)])
+ else:
+ change_list.append(["Changed the source URL of", new_pkg, "to", s.join(seq3)])
+
+def _version_change(change_list, original, new, new_pkg):
+ if original['version']:
+ change_list.append(["Changed the version of", new_pkg, "from", original['version'], "to", new['version']])
+ else:
+ change_list.append(["Changed the version of", new_pkg, "to", new['version']])
+def _extension_fields(change_list, original, new, new_pkg):
# list of the default metadata fields for a dataset
# any fields that are not part of this list are custom fields added by a user or extension
fields = ['owner_org', 'maintainer', 'maintainer_email', 'relationships_as_object', 'private', 'num_tags',
@@ -2878,13 +2932,86 @@ def compare_pkg_dicts(original, new):
for field in addl_fields_list:
if original[field] != new[field]:
if original[field]:
- change_list.append(["Changed field", field.capitalize(), "to", new[field], "(previously", original[field] + ")", "in", s.join(seq1)])
+ change_list.append(["Changed value of field", field.capitalize(), "to", new[field], "(previously", original[field] + ")", "in", new_pkg])
+ else:
+ change_list.append(["Changed value of field", field.capitalize(), "to", new[field], "in", new_pkg])
+def _extra_fields(change_list, original, new, new_pkg):
# check the extras field to see if anything has been added, deleted, or changed
# that is where custom fields added via the web interface go - they do not become
# actual fields in a package dict
- # TODO: add this ^^
-
-
- return change_list
+ # what if they added the field but didn't add a value?
+ s = ""
+ if 'extras' in new:
+ extra_fields_new = _extras_to_dict(new['extras'])
+ extra_new_set = set(extra_fields_new.keys())
+
+ # if the original version has an extra fields, we need to compare the new version's
+ # extras to the original ones
+ if 'extras' in original:
+ extra_fields_original = _extras_to_dict(original['extras'])
+ extra_original_set = set(extra_fields_original.keys())
+
+ # if some fields were added
+ new_fields = list(extra_new_set - extra_original_set)
+ if len(new_fields) == 1:
+ if extra_fields_new[new_fields[0]]:
+ change_list.append(["Added field", s.join(("", new_fields[0], "")),
+ "with value", s.join(("", extra_fields_new[new_fields[0]], "")),
+ "to", new_pkg])
+ else:
+ change_list.append(["Added field", s.join(("", new_fields[0], "")),
+ "to", new_pkg])
+ elif len(new_fields) > 1:
+ seq2 = ["
" + new_fields[i] + " with value " + extra_fields_new[new_fields[i]] + "
" if extra_fields_new[new_fields[i]]
+ else "
" + new_fields[i] + "
"
+ for i in range(0, len(new_fields))]
+ change_list.append(["Added the following fields to", new_pkg, "
", s.join(seq2), "
"])
+
+ # if some fields were deleted
+ deleted_fields = list(extra_original_set - extra_new_set)
+ if len(deleted_fields) == 1:
+ change_list.append(["Removed field", s.join(("", deleted_fields[0], "")), "from", new_pkg])
+ elif len(deleted_fields) > 1:
+ seq2 = ["
" + deleted_fields[i] + "
" for i in range(0, len(deleted_fields))]
+ change_list.append(["Removed the following fields from", new_pkg, "
", s.join(seq2), "
"])
+
+ # if some existing fields were changed
+ extra_fields = list(extra_new_set.intersection(extra_original_set)) # list of extra fields in both the original and new versions
+ for field in extra_fields:
+ if extra_fields_original[field] != extra_fields_new[field]:
+ if extra_fields_original[field]:
+ change_list.append(["Changed value of field", s.join(("", field, "")),
+ "to", s.join(("", extra_fields_new[field], "")),
+ "(previously", s.join(("", extra_fields_original[field], "")) + ")",
+ "in", new_pkg])
+ else:
+ change_list.append(["Changed value of field", s.join(("", field, "")),
+ "to", s.join(("", extra_fields_new[field], "")),
+ "in", new_pkg])
+
+ # if the original version didn't have an extras field, the user could only have added a field (not changed or deleted)
+ else:
+ new_fields = list(extra_new_set)
+ if len(new_fields) == 1:
+ if extra_fields_new[new_fields[0]]:
+ change_list.append(["Added field", s.join(("", new_fields[0], "")),
+ "with value", s.join(("", extra_fields_new[new_fields[0]], "")),
+ "to", new_pkg])
+ else:
+ change_list.append(["Added field", s.join(("", new_fields[0], "")),
+ "to", new_pkg])
+ elif len(new_fields) > 1:
+ seq2 = ["
" + new_fields[i] + " with value " + extra_fields_new[new_fields[i]] + "
" if extra_fields_new[new_fields[i]]
+ else "
" + new_fields[i] + "
"
+ for i in range(0, len(new_fields))]
+ change_list.append(["Added the following fields to", new_pkg, "
"])
@@ -2894,14 +2902,18 @@ def _source_url_change(change_list, original, new, new_pkg):
s = ""
seq2 = ("", original['url'], "")
seq3 = ("", new['url'], "")
- if original['url']:
+ if original['url'] and new['url']:
change_list.append(["Changed the source URL of", new_pkg, "from", s.join(seq2), "to", s.join(seq3)])
+ elif not new['url']:
+ change_list.append(["Removed source URL from", new_pkg])
else:
change_list.append(["Changed the source URL of", new_pkg, "to", s.join(seq3)])
def _version_change(change_list, original, new, new_pkg):
- if original['version']:
+ if original['version'] and new['url']:
change_list.append(["Changed the version of", new_pkg, "from", original['version'], "to", new['version']])
+ elif not new['url']:
+ change_list.append(["Removed version number from", new_pkg])
else:
change_list.append(["Changed the version of", new_pkg, "to", new['version']])
From fb0c55dd7e0e77813951386e2918430cde6a43e7 Mon Sep 17 00:00:00 2001
From: hayley-leblanc
Date: Thu, 25 Jul 2019 11:26:40 +0200
Subject: [PATCH 06/47] Add function documentation
---
ckan/lib/helpers.py | 97 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 95 insertions(+), 2 deletions(-)
diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index 182da154160..db59bd9ac2b 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -2691,6 +2691,15 @@ def sanitize_id(id_):
@core_helper
def compare_pkg_dicts(original, new):
+ '''
+ Takes two package dictionaries that represent consecutive versions of
+ the same dataset and returns a list of detailed & formatted summaries of
+ the changes between the two versions. original and new are the two package
+ dictionaries. The function assumes that both dictionaries will have
+ all of the default package dictionary keys, and also checks for additional
+ keys added by extensions and custom fields added by the user in the web
+ interface.
+ '''
change_list = []
s = ""
@@ -2764,6 +2773,28 @@ def compare_pkg_dicts(original, new):
return change_list
def _extras_to_dict(extras_list):
+ '''
+ Takes a list of dictionaries with the following format:
+ [
+ {
+ "key": ,
+ "value":
+ },
+ ...,
+ {
+ "key": ,
+ "value":
+ }
+ ]
+ and converts it into a single dictionary with the following
+ format:
+ {
+ key_0: value_0,
+ ...,
+ key_n: value_n
+
+ }
+ '''
ret_dict = {}
# the extras_list is a list of dictionaries
for dict in extras_list:
@@ -2772,6 +2803,10 @@ def _extras_to_dict(extras_list):
return ret_dict
def _title_change(change_list, original, new):
+ '''
+ Appends a summary of a change to a dataset's title between two versions
+ (original and new) to change_list.
+ '''
s = ""
seq2 = ("",
@@ -2779,6 +2814,10 @@ def _title_change(change_list, original, new):
change_list.append(["Changed title to", s.join(seq2), "(previously", original['title'] + ")"])
def _org_change(change_list, original, new, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's organization between two versions
+ (original and new) to change_list.
+ '''
s = ""
seq2 = ("",
@@ -2793,6 +2832,10 @@ def _org_change(change_list, original, new, new_pkg):
s.join(seq3)])
def _maintainer_change(change_list, original, new, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's maintainer field between two
+ versions (original and new) to change_list.
+ '''
# if the original dataset had a maintainer
if original['maintainer'] and new['maintainer']:
change_list.append(["Set maintainer of", new_pkg, "to", new['maintainer'], "(previously", original['maintainer'] + ")"])
@@ -2802,6 +2845,10 @@ def _maintainer_change(change_list, original, new, new_pkg):
change_list.append(["Set maintainer of", new_pkg, "to", new['maintainer']])
def _maintainer_email_change(change_list, original, new, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's maintainer e-mail address field
+ between two versions (original and new) to change_list.
+ '''
s = ""
seq2 = ("", new['maintainer_email'], "")
# if the original dataset had a maintainer email
@@ -2814,6 +2861,10 @@ def _maintainer_email_change(change_list, original, new, new_pkg):
change_list.append(["Set maintainer e-mail of", new_pkg, "to", s.join(seq2)])
def _author_change(change_list, original, new, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's author field between two versions
+ (original and new) to change_list.
+ '''
# if the original dataset had an author
if original['author'] and new['author']:
change_list.append(["Set author of", new_pkg, "to", new['author'], "(previously", original['author'] + ")"])
@@ -2823,6 +2874,10 @@ def _author_change(change_list, original, new, new_pkg):
change_list.append(["Set author of", new_pkg, "to", new['author']])
def _author_email_change(change_list, original, new, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's author e-mail address field
+ between two versions (original and new) to change_list.
+ '''
s = ""
seq2 = ("", new['author_email'], "")
# if the original dataset had a author email
@@ -2835,8 +2890,12 @@ def _author_email_change(change_list, original, new, new_pkg):
change_list.append(["Set author e-mail of", new_pkg, "to", s.join(seq2)])
def _description_change(change_list, original, new, new_pkg):
- # displays the two descriptions (like how they are displayed on resource views)
- # TODO: figure out the best way to format this stuff
+ '''
+ Appends a summary of a change to a dataset's description between two versions
+ (original and new) to change_list.
+ '''
+
+ # TODO: find a better way to format the descriptions along with the change summary
# if the original dataset had a description
if original['notes'] and new['notes']:
@@ -2850,6 +2909,10 @@ def _description_change(change_list, original, new, new_pkg):
"to ", "
" + new['notes'] + "
"])
def _tag_change(change_list, new_tags, original_tags, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's tag list between two versions
+ (original and new) to change_list.
+ '''
s = ""
deleted_tags = original_tags - new_tags
deleted_tags_list = list(deleted_tags)
@@ -2874,6 +2937,10 @@ def _tag_change(change_list, new_tags, original_tags, new_pkg):
change_list.append(["Added the following tags to", new_pkg, "
", s.join(seq2), "
"])
def _license_change(change_list, original, new, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's license between two versions
+ (original and new) to change_list.
+ '''
s = ""
seq2 = ()
seq3 = ()
@@ -2889,6 +2956,10 @@ def _license_change(change_list, original, new, new_pkg):
change_list.append(["Changed the license of", new_pkg, "to", s.join(seq3), "(previously", s.join(seq2) + ")"])
def _name_change(change_list, original, new):
+ '''
+ Appends a summary of a change to a dataset's name (and thus the URL it can
+ be accessed at) between two versions (original and new) to change_list.
+ '''
s = ""
old_url = url_for(qualified=True, controller="dataset",
action="read", id=original['name'])
@@ -2899,6 +2970,11 @@ def _name_change(change_list, original, new):
change_list.append(["Moved the dataset from", s.join(seq2), "to", s.join(seq3)])
def _source_url_change(change_list, original, new, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's source URL (metadata field, not
+ its actual URL in the datahub) between two versions (original and new) to
+ change_list.
+ '''
s = ""
seq2 = ("", original['url'], "")
seq3 = ("", new['url'], "")
@@ -2910,6 +2986,10 @@ def _source_url_change(change_list, original, new, new_pkg):
change_list.append(["Changed the source URL of", new_pkg, "to", s.join(seq3)])
def _version_change(change_list, original, new, new_pkg):
+ '''
+ Appends a summary of a change to a dataset's version field (inputted by the user,
+ not from version control) between two versions (original and new) to change_list.
+ '''
if original['version'] and new['url']:
change_list.append(["Changed the version of", new_pkg, "from", original['version'], "to", new['version']])
elif not new['url']:
@@ -2918,6 +2998,14 @@ def _version_change(change_list, original, new, new_pkg):
change_list.append(["Changed the version of", new_pkg, "to", new['version']])
def _extension_fields(change_list, original, new, new_pkg):
+ '''
+ Checks whether any fields that have been added to the package dictionaries
+ by CKAN extensions have been changed between versions. If there have been
+ any changes between the two versions (original and new), a general summary
+ of the change is appended to change_list. This function does not produce
+ summaries for fields added or deleted by extensions, since these changes are
+ not triggered by the user in the web interface or API.
+ '''
# list of the default metadata fields for a dataset
# any fields that are not part of this list are custom fields added by a user or extension
fields = ['owner_org', 'maintainer', 'maintainer_email', 'relationships_as_object', 'private', 'num_tags',
@@ -2952,6 +3040,11 @@ def _extra_fields(change_list, original, new, new_pkg):
# check the extras field to see if anything has been added, deleted, or changed
# that is where custom fields added via the web interface go - they do not become
# actual fields in a package dict
+ '''
+ Checks whether a user has added, removed, or changed any custom fields from
+ the web interface (or API?) and appends a summary of each change to
+ change_list.
+ '''
# what if they added the field but didn't add a value?
s = ""
From 6921941df981ce8af5bf37669cdbaece2f5011a1 Mon Sep 17 00:00:00 2001
From: hayley-leblanc
Date: Thu, 25 Jul 2019 14:24:25 +0200
Subject: [PATCH 07/47] Add activity summaries for changes to resources
---
ckan/lib/helpers.py | 200 +++++++++++++++++++++++-----
ckan/templates/package/changes.html | 2 +-
2 files changed, 168 insertions(+), 34 deletions(-)
diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index db59bd9ac2b..48abe98c618 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -2690,7 +2690,7 @@ def sanitize_id(id_):
return str(uuid.UUID(id_))
@core_helper
-def compare_pkg_dicts(original, new):
+def compare_pkg_dicts(original, new, old_activity_id):
'''
Takes two package dictionaries that represent consecutive versions of
the same dataset and returns a list of detailed & formatted summaries of
@@ -2708,6 +2708,172 @@ def compare_pkg_dicts(original, new):
new['title'], "")
new_pkg = s.join(seq1)
+ _check_metadata_changes(change_list, original, new, new_pkg)
+
+ _check_resource_changes(change_list, original, new, new_pkg, old_activity_id)
+
+ return change_list
+
+def _extras_to_dict(extras_list):
+ '''
+ Takes a list of dictionaries with the following format:
+ [
+ {
+ "key": ,
+ "value":
+ },
+ ...,
+ {
+ "key": ,
+ "value":
+ }
+ ]
+ and converts it into a single dictionary with the following
+ format:
+ {
+ key_0: value_0,
+ ...,
+ key_n: value_n
+
+ }
+ '''
+ ret_dict = {}
+ # the extras_list is a list of dictionaries
+ for dict in extras_list:
+ ret_dict[dict['key']] = dict['value']
+
+ return ret_dict
+
+def _check_resource_changes(change_list, original, new, new_pkg, old_activity_id):
+ '''
+ Checks whether a dataset's resources have changed - whether new ones have been uploaded,
+ existing ones have been deleted, or existing ones have been edited. For existing
+ resources, checks whether their names, formats, and/or descriptions have changed, as well
+ as whether a new file has been uploaded for the resource.
+ '''
+
+ # TODO: clean this up
+
+ # make a set of the resource IDs present in original and new
+ original_resource_set = set()
+ original_resource_dict = {}
+ new_resource_set = set()
+ new_resource_dict = {}
+ s = ""
+
+ for resource in original['resources']:
+ original_resource_set.add(resource['id'])
+ original_resource_dict[resource['id']] = {'name': resource['name'],
+ 'url': resource['url'],
+ 'description': resource['description'],
+ 'format': resource['format']}
+
+
+ for resource in new['resources']:
+ new_resource_set.add(resource['id'])
+ new_resource_dict[resource['id']] = {'name': resource['name'],
+ 'url': resource['url'],
+ 'description': resource['description'],
+ 'format': resource['format']}
+
+ # get the IDs of the resources that have been added between the versions
+ new_resources = list(new_resource_set - original_resource_set)
+ for resource_id in new_resources:
+ seq2 = ("",
+ new_resource_dict[resource_id]['name'], "")
+ change_list.append(["Added resource", s.join(seq2), "to", new_pkg])
+
+ # get the IDs of resources that have been deleted between versions
+ deleted_resources = list(original_resource_set - new_resource_set)
+ for resource_id in deleted_resources:
+ seq2 = ("",
+ original_resource_dict[resource_id]['name'], "")
+ change_list.append(["Deleted resource", s.join(seq2), "from", new_pkg])
+
+ # now check the resources that are in both and see if any have been changed
+
+ # TODO: only one resource can be edited at a time like this right?
+ # so we could stop once we find the one that is edited
+ resources = new_resource_set.intersection(original_resource_set)
+ for resource_id in resources:
+ original_metadata = original_resource_dict[resource_id]
+ new_metadata = new_resource_dict[resource_id]
+
+ if original_metadata['name'] != new_metadata['name']:
+ seq2 = ("",
+ original_resource_dict[resource_id]['name'], "")
+ seq3 = ("",
+ new_resource_dict[resource_id]['name'], "")
+ change_list.append(["Renamed resource", s.join(seq2), "to", s.join(seq3), "in", new_pkg])
+
+ # you can't remove a format, but if a resource's format isn't recognized, it won't have one set
+ # if a format was not originally set and the user set one
+ if not original_metadata['format'] and new_metadata['format']:
+ seq2 = ("",
+ new_resource_dict[resource_id]['name'], "")
+ seq3 = ("",
+ new_metadata['format'], "")
+ change_list.append(["Set format of resource", s.join(seq2), "to", s.join(seq3), "in", new_pkg])
+ # if both versions have a format but the format changed
+ elif original_metadata['format'] != new_metadata['format']:
+ seq2 = ("",
+ original_resource_dict[resource_id]['name'], "")
+ seq3 = ("",
+ new_metadata['format'], "")
+ seq4 = ("",
+ original_metadata['format'], "")
+ change_list.append(["Set format of resource", s.join(seq2), "to", s.join(seq3), "(previously", s.join(seq4) + ")", "in", new_pkg])
+
+ # if the description changed
+ if not original_metadata['description'] and new_metadata['description']:
+ seq2 = ("",
+ new_resource_dict[resource_id]['name'], "")
+ change_list.append(["Updated description of resource", s.join(seq2), "in",
+ new_pkg, "to ",
+ "
" + new_metadata['description'] + "
"])
+
+ # if there was a description but the user removed it
+ elif original_metadata['description'] and not new_metadata['description']:
+ seq2 = ("",
+ new_resource_dict[resource_id]['name'], "")
+ change_list.append(["Removed description from resource", s.join(seq2), "in", new_pkg])
+
+ # if both have descriptions but they are different
+ elif original_metadata['description'] != new_metadata['description']:
+ seq2 = ("",
+ new_resource_dict[resource_id]['name'], "")
+ change_list.append(["Updated description of resource", s.join(seq2), "in",
+ new_pkg, "from ",
+ "
" + original_metadata['description'] + "
",
+ "to ",
+ "
" + new_metadata['description'] + "
"])
+
+ # check if the user uploaded a new file
+ # TODO: use regular expressions to determine the actual name of the new and old files
+ if original_metadata['url'] != new_metadata['url']:
+ seq2 = ("",
+ new_resource_dict[resource_id]['name'], "")
+ change_list.append(["Uploaded a new file to resource", s.join(seq2), "in", new_pkg])
+
+def _check_metadata_changes(change_list, original, new, new_pkg):
+ '''
+ Checks whether a dataset's metadata fields (fields in its package dictionary
+ not including resources) have changed between two consecutive versions and
+ puts a list of formatted summaries of these changes in change_list.
+ '''
# if the title has changed
if original['title'] != new['title']:
_title_change(change_list, original, new)
@@ -2770,38 +2936,6 @@ def compare_pkg_dicts(original, new):
_extension_fields(change_list, original, new, new_pkg)
_extra_fields(change_list, original, new, new_pkg)
- return change_list
-
-def _extras_to_dict(extras_list):
- '''
- Takes a list of dictionaries with the following format:
- [
- {
- "key": ,
- "value":
- },
- ...,
- {
- "key": ,
- "value":
- }
- ]
- and converts it into a single dictionary with the following
- format:
- {
- key_0: value_0,
- ...,
- key_n: value_n
-
- }
- '''
- ret_dict = {}
- # the extras_list is a list of dictionaries
- for dict in extras_list:
- ret_dict[dict['key']] = dict['value']
-
- return ret_dict
-
def _title_change(change_list, original, new):
'''
Appends a summary of a change to a dataset's title between two versions
diff --git a/ckan/templates/package/changes.html b/ckan/templates/package/changes.html
index 996494955ef..27a84f417fe 100644
--- a/ckan/templates/package/changes.html
+++ b/ckan/templates/package/changes.html
@@ -32,7 +32,7 @@
{{ _('Changes') }}
{% for i in range(1, activity_diff.activities|length) %}
On {{ h.render_datetime(activity_diff.activities[1].timestamp, with_hours=True, with_seconds=True) }}, {{ h.linked_user(activity_diff.activities[1].user_id) }}:
- {% set changes = h.compare_pkg_dicts(activity_diff.activities[0].data.package, activity_diff.activities[1].data.package) %}
+ {% set changes = h.compare_pkg_dicts(activity_diff.activities[0].data.package, activity_diff.activities[1].data.package, activity_diff.activities[0].id) %}
{% for change in changes %}
From 43ffc3e99a48523347af612a06f2247a0b8b6579 Mon Sep 17 00:00:00 2001
From: hayley-leblanc
Date: Fri, 26 Jul 2019 11:04:27 +0200
Subject: [PATCH 08/47] Add ability to view multiple consecutive change
summaries
---
ckan/lib/helpers.py | 20 +++++
ckan/templates/package/changes.html | 73 ++++++-------------
.../package/snippets/change_item.html | 42 +++++++++++
.../snippets/activities/changed_package.html | 2 +
ckan/views/dataset.py | 52 ++++++++++++-
5 files changed, 138 insertions(+), 51 deletions(-)
create mode 100644 ckan/templates/package/snippets/change_item.html
diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py
index 48abe98c618..8b92e6fdf76 100644
--- a/ckan/lib/helpers.py
+++ b/ckan/lib/helpers.py
@@ -2712,6 +2712,11 @@ def compare_pkg_dicts(original, new, old_activity_id):
_check_resource_changes(change_list, original, new, new_pkg, old_activity_id)
+ # if the dataset was updated but none of the fields we check were changed,
+ # display a message stating that
+ if len(change_list) == 0:
+ change_list.append(["No fields were updated. See metadata diff for more details."])
+
return change_list
def _extras_to_dict(extras_list):
@@ -3254,3 +3259,18 @@ def _extra_fields(change_list, original, new, new_pkg):
elif len(deleted_fields) > 1:
seq2 = ["
" + deleted_fields[i] + "
" for i in range(0, len(deleted_fields))]
change_list.append(["Removed the following fields from", new_pkg, "
{##}
{% endblock %}
- {# iterate through the activities that are NOT the original one -
- iterate, rather than just look at the second one, so that we could
- potentially add a way to look at the diff between larger sets of
- datasets in the future #}
-
- {% for i in range(1, activity_diff.activities|length) %}
- On {{ h.render_datetime(activity_diff.activities[1].timestamp, with_hours=True, with_seconds=True) }}, {{ h.linked_user(activity_diff.activities[1].user_id) }}:
+ {% set select_list1 = h.activity_list_select(pkg_activity_list, activity_diffs[0].activities[0].id) %}
+ {% set select_list2 = h.activity_list_select(pkg_activity_list, activity_diffs[0].activities[1].id) %}
+
- {% set changes = h.compare_pkg_dicts(activity_diff.activities[0].data.package, activity_diff.activities[1].data.package, activity_diff.activities[0].id) %}
-
- {% for change in changes %}
-
- {% for element in change %}
- {{ element|safe }}
- {% endfor %}
-
-
- {% endfor %}
-
+ {% for i in range(activity_diffs|length) %}
+ {% snippet "package/snippets/change_item.html", activity_diff=activity_diffs[i], pkg_dict=pkg_dict %}
+
+
{% endfor %}
- {# button to show JSON metadata diff - not shown by default #}
-
-
-