diff --git a/ckan/tests/migration/__init__.py b/ckan/tests/migration/__init__.py deleted file mode 100644 index d94dac35161..00000000000 --- a/ckan/tests/migration/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# encoding: utf-8 - -'''**All migration scripts should have tests.** - -.. todo:: - - Write some tests for a migration script, and then use them as an example to - fill out this guidelines section. - -''' diff --git a/ckan/tests/migration/test_migrate_package_activity.py b/ckan/tests/migration/test_migrate_package_activity.py deleted file mode 100644 index b42081ee3f4..00000000000 --- a/ckan/tests/migration/test_migrate_package_activity.py +++ /dev/null @@ -1,251 +0,0 @@ -# encoding: utf-8 - -import copy -from nose.tools import assert_equal as eq_ -from collections import defaultdict -import uuid -import datetime - -import mock - -import ckan.tests.factories as factories -import ckan.tests.helpers as helpers -from ckan.migration.migrate_package_activity import (migrate_dataset, - wipe_activity_detail, - PackageDictizeMonkeyPatch) -from ckan.migration.revision_legacy_code import ( - RevisionTableMappings, make_package_revision) -from ckan.model.activity import package_activity_list -from ckan import model -import ckan.logic - - -class TestMigrateDataset(object): - def setup(self): - helpers.reset_db() - - @classmethod - def teardown_class(cls): - helpers.reset_db() - - def test_migration(self): - # Test that the migration correctly takes the package_revision (etc) - # tables and populates the Activity.data, i.e. it does it the same as - # if you made a change to the dataset with the current version of CKAN - # and the Activity was created by activity_stream_item(). - dataset = factories.Dataset(resources=[ - {u'url': u'http://example.com/a.csv', u'format': u'csv'} - ]) - activity = package_activity_list(dataset['id'], 0, 0)[0] - activity_data_as_it_should_be = copy.deepcopy(activity.data) - - # Remove 'activity.data.package' to provoke the migration to regenerate - # it from package_revision (etc) - activity = model.Activity.get(activity.id) - del activity.data['package'] - model.repo.commit_and_remove() - with PackageDictizeMonkeyPatch(): - migrate_dataset(dataset['name'], {}) - - activity_data_migrated = \ - package_activity_list(dataset['id'], 0, 0)[0].data - eq_(activity_data_as_it_should_be, activity_data_migrated) - - def test_migration_with_multiple_revisions(self): - dataset = factories.Dataset(resources=[ - {u'url': u'http://example.com/a.csv', u'format': u'csv'} - ]) - make_package_revision(model.Package.get(dataset['id'])) - dataset['title'] = u'Title 2' - helpers.call_action(u'package_update', **dataset) - make_package_revision(model.Package.get(dataset['id'])) - dataset['title'] = u'Title 3' - helpers.call_action(u'package_update', **dataset) - make_package_revision(model.Package.get(dataset['id'])) - - activity = package_activity_list(dataset['id'], 0, 0)[1] - activity_data_as_it_should_be = copy.deepcopy(activity.data['package']) - - # Remove 'activity.data.package.resources' to provoke the migration to - # regenerate it from package_revision (etc) - activity = model.Activity.get(activity.id) - activity.data = {u'actor': None, u'package': {u'title': u'Title 2'}} - model.Session.commit() - model.Session.remove() - # double check that worked... - assert not \ - model.Activity.get(activity.id).data['package'].get(u'resources') - - with PackageDictizeMonkeyPatch(): - migrate_dataset(dataset['name'], {}) - - eq_.__self__.maxDiff = None - activity_data_migrated = \ - package_activity_list(dataset['id'], 0, 0)[1].data['package'] - eq_(activity_data_as_it_should_be, activity_data_migrated) - eq_(activity_data_migrated['title'], u'Title 2') - - def test_a_contemporary_activity_needs_no_migration(self): - # An Activity created by a change under the current CKAN should not - # need a migration - check it does a nop - dataset = factories.Dataset(resources=[ - {u'url': u'http://example.com/a.csv', u'format': u'csv'} - ]) - activity = package_activity_list(dataset['id'], 0, 0)[0] - activity_data_before = copy.deepcopy(activity.data) - - with PackageDictizeMonkeyPatch(): - migrate_dataset(dataset['name'], {}) - - activity_data_after = package_activity_list(dataset['id'], 0, 0)[0].data - eq_(activity_data_before, activity_data_after) - - def test_revision_missing(self): - dataset = factories.Dataset(resources=[ - {u'url': u'http://example.com/a.csv', u'format': u'csv'} - ]) - dataset['title'] = u'Title 2' - helpers.call_action(u'package_update', **dataset) - # delete a part of the revision, so package_show for the revision will - # return NotFound - PackageRevision = RevisionTableMappings.instance().PackageRevision - model.Session.query(PackageRevision).delete() - model.Session.commit() - # Adjust the Activity as if CKAN <=2.8 saved it, so that it will be - # migrated - activity = package_activity_list(dataset['id'], 0, 0)[1] - activity = model.Activity.get(activity.id) - activity.data = {u'actor': None, - u'package': {u'title': u'Test Dataset'}} - activity.revision_id = u'not real one' # because Revisioner wrote that, - # and that is no longer active - - model.Session.commit() - model.Session.remove() - # double check that worked... - assert not \ - model.Activity.get(activity.id).data['package'].get(u'resources') - - errors = defaultdict(int) - with PackageDictizeMonkeyPatch(): - migrate_dataset(dataset['name'], errors) - - eq_(dict(errors), {u'Revision missing': 1}) - activity_data_migrated = \ - package_activity_list(dataset['id'], 0, 0)[1].data - # the title is there so the activity stream can display it - eq_(activity_data_migrated['package']['title'], u'Test Dataset') - # the rest of the dataset is missing - better that than just the - # dictized package without resources, extras etc - assert u'resources' not in activity_data_migrated['package'] - - def test_revision_and_data_missing(self): - dataset = factories.Dataset(resources=[ - {u'url': u'http://example.com/a.csv', u'format': u'csv'} - ]) - dataset['title'] = u'Title 2' - helpers.call_action(u'package_update', **dataset) - # delete a part of the revision, so package_show for the revision will - # return NotFound - PackageRevision = RevisionTableMappings.instance().PackageRevision - model.Session.query(PackageRevision).delete() - model.Session.commit() - # delete 'activity.data.package' so it needs migrating AND the package - # title won't be available, so we test how the migration deals with - # that - activity = package_activity_list(dataset['id'], 0, 0)[1] - activity = model.Activity.get(activity.id) - del activity.data['package'] - activity.revision_id = u'dummy-value' - model.Session.commit() - - errors = defaultdict(int) - with PackageDictizeMonkeyPatch(): - migrate_dataset(dataset['name'], errors) - - eq_(dict(errors), {u'Revision missing': 1}) - activity_data_migrated = \ - package_activity_list(dataset['id'], 0, 0)[1].data - # the title is there so the activity stream can display it - eq_(activity_data_migrated['package']['title'], u'unknown') - assert u'resources' not in activity_data_migrated['package'] - - def test_package_show_error(self): - dataset = factories.Dataset(resources=[ - {u'url': u'http://example.com/a.csv', u'format': u'csv'} - ]) - make_package_revision(model.Package.get(dataset['id'])) - # delete 'activity.data.package.resources' so it needs migrating - activity = package_activity_list(dataset['id'], 0, 0)[0] - activity = model.Activity.get(activity.id) - activity.data = {u'actor': None, - u'package': {u'title': u'Test Dataset'}} - model.Session.commit() - model.Session.remove() - # double check that worked... - assert not \ - model.Activity.get(activity.id).data['package'].get(u'resources') - - errors = defaultdict(int) - # package_show raises an exception - could be because data doesn't - # conform to the latest dataset schema or is incompatible with - # currently installed plugins. Those errors shouldn't prevent the - # migration from going ahead. - ckan.logic._actions['package_show'] = \ - mock.MagicMock(side_effect=Exception(u'Schema error')) - - try: - with PackageDictizeMonkeyPatch(): - migrate_dataset(dataset['name'], errors) - finally: - # restore package_show - ckan.logic.clear_actions_cache() - - eq_(dict(errors), {u'Schema error': 1}) - - -class TestWipeActivityDetail(object): - def setup(self): - helpers.reset_db() - - @classmethod - def teardown_class(cls): - helpers.reset_db() - - def test_wipe_activity_detail(self): - dataset = factories.Dataset() - user = factories.User() - activity = factories.Activity( - user_id=user['id'], object_id=dataset['id'], revision_id=None, - activity_type=u'new package', - data={ - u'package': copy.deepcopy(dataset), - u'actor': u'Mr Someone', - }) - ad = model.ActivityDetail( - activity_id=activity['id'], object_id=dataset['id'], - object_type=u'package', activity_type=u'new package') - model.Session.add(ad) - model.Session.commit() - eq_(model.Session.query(model.ActivityDetail).count(), 1) - wipe_activity_detail(delete_activity_detail=u'y') - eq_(model.Session.query(model.ActivityDetail).count(), 0) - - def test_dont_wipe_activity_detail(self): - dataset = factories.Dataset() - user = factories.User() - activity = factories.Activity( - user_id=user['id'], object_id=dataset['id'], revision_id=None, - activity_type=u'new package', - data={ - u'package': copy.deepcopy(dataset), - u'actor': u'Mr Someone', - }) - ad = model.ActivityDetail( - activity_id=activity['id'], object_id=dataset['id'], - object_type=u'package', activity_type=u'new package') - model.Session.add(ad) - model.Session.commit() - eq_(model.Session.query(model.ActivityDetail).count(), 1) - wipe_activity_detail(delete_activity_detail=u'n') # i.e. don't do it! - eq_(model.Session.query(model.ActivityDetail).count(), 1) diff --git a/ckan/tests/migration/test_revision_legacy_code.py b/ckan/tests/migration/test_revision_legacy_code.py deleted file mode 100644 index 5150d41a4e4..00000000000 --- a/ckan/tests/migration/test_revision_legacy_code.py +++ /dev/null @@ -1,303 +0,0 @@ -# encoding: utf-8 - -from difflib import unified_diff -from pprint import pprint, pformat - -from ckan import model - -import ckan.lib.search as search -from ckan.lib.dictization.model_save import package_dict_save -from ckan.lib.create_test_data import CreateTestData - -from ckan.migration.revision_legacy_code import package_dictize_with_revisions as package_dictize -from ckan.migration.revision_legacy_code import RevisionTableMappings, make_package_revision -from ckan.migration.migrate_package_activity import PackageDictizeMonkeyPatch - - -# tests here have been moved from ckan/tests/legacy/lib/test_dictization.py -class TestPackageDictizeWithRevisions(object): - @classmethod - def setup_class(cls): - # clean the db so we can run these tests on their own - model.repo.rebuild_db() - search.clear_all() - CreateTestData.create() - make_package_revision(model.Package.by_name('annakarenina')) - - cls.package_expected = { - u'author': None, - u'author_email': None, - u'creator_user_id': None, - 'extras': [ - # extra_revision_table is no longer being populated because - # PackageExtra no longer has - # vdm.sqlalchemy.Revisioner(extra_revision_table) (removed in - # #4691) so don't test extras for the moment - # {'key': u'david', 'state': u'active', 'value': u'new_value'}, - # {'key': u'genre', 'state': u'active', 'value': u'new_value'}, - # {'key': u'original media', 'state': u'active', - # 'value': u'book'} - ], - 'groups': [{ - u'name': u'david', - u'capacity': u'public', - u'image_url': u'', - u'image_display_url': u'', - u'description': u'These are books that David likes.', - u'display_name': u"Dave's books", - u'type': u'group', - u'state': u'active', - u'is_organization': False, - u'title': u"Dave's books", - u"approval_status": u"approved"}, - { - u'name': u'roger', - u'capacity': u'public', - u'description': u'Roger likes these books.', - u'image_url': u'', - 'image_display_url': u'', - 'display_name': u"Roger's books", - u'type': u'group', - u'state': u'active', - u'is_organization': False, - u'title': u"Roger's books", - u"approval_status": u"approved"}], - 'isopen': True, - u'license_id': u'other-open', - 'license_title': u'Other (Open)', - 'organization': None, - u'owner_org': None, - u'maintainer': None, - u'maintainer_email': None, - u'name': u'annakarenina', - u'notes': u'Some test notes\n\n### A 3rd level heading\n\n**Some bolded text.**\n\n*Some italicized text.*\n\nForeign characters:\nu with umlaut \xfc\n66-style quote \u201c\nforeign word: th\xfcmb\n\nNeeds escaping:\nleft arrow <\n\n\n\n', - 'num_resources': 2, - 'num_tags': 3, - u'private': False, - 'relationships_as_object': [], - 'relationships_as_subject': [], - 'resources': [{u'alt_url': u'alt123', - u'cache_last_updated': None, - u'cache_url': None, - u'description': u'Full text. Needs escaping: " Umlaut: \xfc', - u'format': u'plain text', - u'hash': u'abc123', - u'last_modified': None, - u'mimetype': None, - u'mimetype_inner': None, - u'name': None, - u'position': 0, - u'resource_type': None, - u'size': None, - u'size_extra': u'123', - u'url_type': None, - u'state': u'active', - u'url': u'http://datahub.io/download/x=1&y=2',}, - {u'alt_url': u'alt345', - u'cache_last_updated': None, - u'cache_url': None, - u'description': u'Index of the novel', - u'format': u'JSON', - u'hash': u'def456', - u'last_modified': None, - u'mimetype': None, - u'mimetype_inner': None, - u'name': None, - u'position': 1, - u'resource_type': None, - u'url_type': None, - u'size': None, - u'size_extra': u'345', - u'state': u'active', - u'url': u'http://datahub.io/index.json'}], - u'state': u'active', - 'tags': [{u'name': u'Flexible \u30a1', - 'display_name': u'Flexible \u30a1', - u'state': u'active'}, - {'display_name': u'russian', - u'name': u'russian', - u'state': u'active'}, - {'display_name': u'tolstoy', - u'name': u'tolstoy', - u'state': u'active'}], - u'title': u'A Novel By Tolstoy', - u'type': u'dataset', - u'url': u'http://datahub.io', - u'version': u'0.7a', - } - - @classmethod - def teardown_class(cls): - model.repo.rebuild_db() - model.Session.remove() - - def test_09_package_alter(self): - - context = {"model": model, - "session": model.Session, - "user": 'testsysadmin' - } - - anna1 = model.Session.query(model.Package).filter_by(name='annakarenina').one() - - anna_dictized = package_dictize(anna1, context) - - anna_dictized["name"] = u'annakarenina_changed' - anna_dictized["resources"][0]["url"] = u'http://new_url' - - package_dict_save(anna_dictized, context) - model.Session.commit() - model.Session.remove() - make_package_revision(model.Package.by_name('annakarenina_changed')) - - pkg = model.Session.query(model.Package).filter_by(name='annakarenina_changed').one() - - package_dictized = package_dictize(pkg, context) - - resources_revisions = model.Session.query(RevisionTableMappings.instance().ResourceRevision).filter_by(package_id=anna1.id).all() - - sorted_resource_revisions = sorted(resources_revisions, key=lambda x: (x.revision_timestamp, x.url))[::-1] - for res in sorted_resource_revisions: - print(res.id, res.revision_timestamp, res.state) - assert len(sorted_resource_revisions) == 4 # 2 resources originally, then make_package_revision saves them both again - - # Make sure we remove changeable fields BEFORE we store the pretty-printed version - # for comparison - clean_package_dictized = self.remove_changable_columns(package_dictized) - - anna_original = pformat(anna_dictized) - anna_after_save = pformat(clean_package_dictized) - - assert self.remove_changable_columns(anna_dictized) == clean_package_dictized, \ - "\n".join(unified_diff(anna_original.split("\n"), anna_after_save.split("\n"))) - - # changes to the package, relied upon by later tests - anna1 = model.Session.query(model.Package).filter_by(name='annakarenina_changed').one() - anna_dictized = package_dictize(anna1, context) - anna_dictized['name'] = u'annakarenina_changed2' - anna_dictized['resources'][0]['url'] = u'http://new_url2' - anna_dictized['tags'][0]['name'] = u'new_tag' - anna_dictized['tags'][0].pop('id') # test if - anna_dictized['extras'][0]['value'] = u'new_value' - - package_dict_save(anna_dictized, context) - model.Session.commit() - model.Session.remove() - make_package_revision(model.Package.by_name('annakarenina_changed2')) - - anna1 = model.Session.query(model.Package).filter_by(name='annakarenina_changed2').one() - anna_dictized = package_dictize(anna1, context) - anna_dictized['notes'] = 'wee' - anna_dictized['resources'].append({ - 'format': u'plain text', - 'url': u'http://newurl'} - ) - anna_dictized['tags'].append({'name': u'newnew_tag'}) - anna_dictized['extras'].append({'key': 'david', - 'value': u'new_value'}) - - package_dict_save(anna_dictized, context) - model.Session.commit() - model.Session.remove() - make_package_revision(model.Package.by_name('annakarenina_changed2')) - - def test_13_get_package_in_past(self): - - context = {'model': model, - 'session': model.Session} - - anna1 = model.Session.query(model.Package).filter_by(name='annakarenina_changed2').one() # this depends on the previous test running :( - - pkgrevisions = model.Session.query(RevisionTableMappings.instance().PackageRevision).filter_by(id=anna1.id).all() - sorted_packages = sorted(pkgrevisions, key=lambda x: x.revision_timestamp) - - context['revision_id'] = sorted_packages[0].revision_id # original state - - with PackageDictizeMonkeyPatch(): - first_dictized = self.remove_changable_columns(package_dictize(anna1, context)) - assert self.remove_changable_columns(self.package_expected) == first_dictized - - context['revision_id'] = sorted_packages[1].revision_id - - second_dictized = self.remove_changable_columns(package_dictize(anna1, context)) - - first_dictized["name"] = u'annakarenina_changed' - first_dictized["resources"][0]["url"] = u'http://new_url' - - assert second_dictized == first_dictized - - context['revision_id'] = sorted_packages[2].revision_id - third_dictized = self.remove_changable_columns(package_dictize(anna1, context)) - - second_dictized['name'] = u'annakarenina_changed2' - second_dictized['resources'][0]['url'] = u'http://new_url2' - second_dictized['tags'][0]['name'] = u'new_tag' - second_dictized['tags'][0]['display_name'] = u'new_tag' - second_dictized['state'] = 'active' - - print('\n'.join(unified_diff(pformat(second_dictized).split('\n'), pformat(third_dictized).split('\n')))) - assert second_dictized == third_dictized - - context['revision_id'] = sorted_packages[3].revision_id # original state - forth_dictized = self.remove_changable_columns(package_dictize(anna1, context)) - - third_dictized['notes'] = 'wee' - third_dictized['resources'].insert(2, { - u'cache_last_updated': None, - u'cache_url': None, - u'description': u'', - u'format': u'plain text', - u'hash': u'', - u'last_modified': None, - u'mimetype': None, - u'mimetype_inner': None, - u'name': None, - u'position': 2, - u'resource_type': None, - u'url_type': None, - u'size': None, - u'state': u'active', - u'url': u'http://newurl'}) - third_dictized['num_resources'] = third_dictized['num_resources'] + 1 - - third_dictized['tags'].insert(1, {'name': u'newnew_tag', 'display_name': u'newnew_tag', 'state': 'active'}) - third_dictized['num_tags'] = third_dictized['num_tags'] + 1 - third_dictized['state'] = 'active' - third_dictized['state'] = 'active' - - pprint(third_dictized) - pprint(forth_dictized) - - assert third_dictized == forth_dictized - - def remove_changable_columns(self, dict, remove_package_id=False): - ids_to_keep = ['license_id', 'creator_user_id'] - if not remove_package_id: - ids_to_keep.append('package_id') - - for key, value in dict.items(): - if key.endswith('id') and key not in ids_to_keep: - dict.pop(key) - if key == 'created': - dict.pop(key) - if 'timestamp' in key: - dict.pop(key) - if key in ['metadata_created','metadata_modified']: - dict.pop(key) - if isinstance(value, list): - for new_dict in value: - self.remove_changable_columns(new_dict, - key in ['resources', 'extras'] or remove_package_id) - - # TEMPORARY HACK - we remove 'extras' so they aren't tested. This - # is due to package_extra_revisions being migrated from ckan/model - # in #4691 but not the rest of the model revisions just yet. Until - # we finish this work (#4664) it is hard to get this working - - # extra_revision_table is no longer being populated because - # PackageExtra no longer has - # vdm.sqlalchemy.Revisioner(extra_revision_table). However #4664 - # will allow use to manually create revisions and test this again. - if key == 'extras': - dict.pop(key) - # END OF HACK - return dict