Skip to content

Commit

Permalink
Improve tests
Browse files Browse the repository at this point in the history
* Use PackageDictizeMonkeyPatch to patch package_dictize, because a context manager like this cleans up after itself effectively, so any tests that run after test_migrate_package_activity.py and test_revision_legacy_code.py will use the standard package_show now.
* Fix test_revision_legacy_code.py now package_extra_revision table is not being populated at the moment.
* Copied in create_object_version() from vdm, so we can use PackageExtraRevision without its .continuity being mapped to the PackageExtra.
  • Loading branch information
David Read committed Apr 12, 2019
1 parent 6e94632 commit 15507df
Show file tree
Hide file tree
Showing 6 changed files with 392 additions and 52 deletions.
47 changes: 33 additions & 14 deletions ckan/migration/migrate_package_activity.py
Expand Up @@ -77,28 +77,47 @@ def migrate_all_datasets():
dataset_names = logic.get_action(u'package_list')(get_context(), {})
num_datasets = len(dataset_names)
errors = defaultdict(int)
for i, dataset_name in enumerate(dataset_names):
print(u'\n{}/{} dataset: {}'.format(i + 1, num_datasets, dataset_name))
migrate_dataset(dataset_name, errors)
with PackageDictizeMonkeyPatch():
for i, dataset_name in enumerate(dataset_names):
print(u'\n{}/{} dataset: {}'
.format(i + 1, num_datasets, dataset_name))
migrate_dataset(dataset_name, errors)
print(u'Migrated:')
print(u' {} datasets'.format(len(dataset_names)))
num_activities = num_activities_migratable()
print(u' with {} activities'.format(num_activities))
print_errors(errors)


class PackageDictizeMonkeyPatch(object):
'''Patches package_dictize to add back in the revision functionality. This
allows you to specify context['revision_id'] and see the old revisions of
a package.
This works as a context object. We could have used mock.patch and saved a
couple of lines here, but we'd have had to add mock to requirements.txt.
'''
def __enter__(self):
import ckan.lib.dictization.model_dictize as model_dictize
try:
import ckan.migration.revision_legacy_code as revision_legacy_code
except ImportError:
# convenient to look for it in the current directory if you just
# download these files because you are upgrading an older ckan
import revision_legacy_code
self.existing_function = model_dictize.package_dictize
model_dictize.package_dictize = \
revision_legacy_code.package_dictize_with_revisions

def __exit__(self, exc_type, exc_val, exc_tb):
import ckan.lib.dictization.model_dictize as model_dictize
model_dictize.package_dictize = self.existing_function


def migrate_dataset(dataset_name, errors):
# monkey patch the legacy versions of code back into CKAN - so it has the
# revision functionality needed for this migration
import ckan.lib.dictization.model_dictize as model_dictize
try:
import ckan.migration.revision_legacy_code as revision_legacy_code
except ImportError:
# convenient to look for it in the current directory if you just
# download these files because you are upgrading an older ckan
import revision_legacy_code
model_dictize.package_dictize = \
revision_legacy_code.package_dictize_with_revisions
'''
NB this function should be run in a `with PackageDictizeMonkeyPatch():`
'''

import ckan.logic as logic
from ckan import model
Expand Down
298 changes: 294 additions & 4 deletions ckan/migration/revision_legacy_code.py
@@ -1,6 +1,14 @@
# encoding: utf-8

import uuid
import datetime

from sqlalchemy.sql import select
from sqlalchemy import and_, inspect
import sqlalchemy.orm.properties
from sqlalchemy.orm import class_mapper
from sqlalchemy.orm import relation
from vdm.sqlalchemy import add_fake_relation

import ckan.logic as logic
import ckan.lib.dictization as d
Expand Down Expand Up @@ -195,9 +203,291 @@ def _execute_with_revision(q, rev_table, context):
return session.execute(q)


# Copied from vdm BUT without '.continuity' mapped to the base object
def create_object_version(mapper_fn, base_object, rev_table):
'''Create the Version Domain Object corresponding to base_object.
E.g. if Package is our original object we should do::
# name of Version Domain Object class
PackageVersion = create_object_version(..., Package, ...)
NB: This must obviously be called after mapping has happened to
base_object.
'''
# TODO: can we always assume all versioned objects are stateful?
# If not need to do an explicit check
class MyClass(object):
def __init__(self, **kw):
for k, v in kw.iteritems():
setattr(self, k, v)

name = base_object.__name__ + u'Revision'
MyClass.__name__ = str(name)
MyClass.__continuity_class__ = base_object

# Must add this so base object can retrieve revisions ...
base_object.__revision_class__ = MyClass

ourmapper = mapper_fn(
MyClass, rev_table,
# NB: call it all_revisions_... rather than just revisions_... as it
# will yield all revisions not just those less than the current
# revision

# ---------------------
# Deviate from VDM here
#
# properties={
# 'continuity':relation(base_object,
# backref=backref('all_revisions_unordered',
# cascade='all, delete, delete-orphan'),
# order_by=rev_table.c.revision_id.desc()
# ),
# },
# order_by=[rev_table.c.continuity_id, rev_table.c.revision_id.desc()]
# ---------------------
)
base_mapper = class_mapper(base_object)
# add in 'relationship' stuff from continuity onto revisioned obj
# 3 types of relationship
# 1. scalar (i.e. simple fk)
# 2. list (has many) (simple fk the other way)
# 3. list (m2m) (join table)
#
# Also need to check whether related object is revisioned
#
# If related object is revisioned then can do all of these
# If not revisioned can only support simple relation (first case -- why?)
for prop in base_mapper.iterate_properties:
try:
is_relation = prop.__class__ == \
sqlalchemy.orm.properties.PropertyLoader
except AttributeError:
# SQLAlchemy 0.9
is_relation = prop.__class__ == \
sqlalchemy.orm.properties.RelationshipProperty

if is_relation:
# in sqlachemy 0.4.2
# prop_remote_obj = prop.select_mapper.class_
# in 0.4.5
prop_remote_obj = prop.argument
remote_obj_is_revisioned = \
getattr(prop_remote_obj, u'__revisioned__', False)
# this is crude, probably need something better
is_many = (prop.secondary is not None or prop.uselist)
if remote_obj_is_revisioned:
propname = prop.key
add_fake_relation(MyClass, propname, is_many=is_many)
elif not is_many:
ourmapper.add_property(prop.key, relation(prop_remote_obj))
else:
# TODO: actually deal with this
# raise a warning of some kind
msg = \
u'Skipping adding property %s to revisioned object' % prop

return MyClass


# Tests use this to manually create revisions, that look just like how
# CKAN<=2.8 used to create automatically.
def make_package_revision(package):
'''Manually create a revision for a package and its related objects
'''
# So far only PackageExtra needs manually creating - the rest still happen
# automatically
instances = []
extras = model.Session.query(model.PackageExtra) \
.filter_by(package_id=package.id) \
.all()
instances.extend(extras)
make_revision(instances)


# Tests use this to manually create revisions, that look just like how
# CKAN<=2.8 used to create automatically.
def make_revision(instances):
'''Manually create a revision.
Copies a new/changed row from a table (e.g. Package) into its
corresponding revision table (e.g. PackageRevision) and makes an entry
in the Revision table.
'''
# model.repo.new_revision() was called in the model code, which is:
# vdm.sqlalchemy.tools.Repository.new_revision() which did this:
Revision = RevisionTableMappings.instance().Revision
revision = Revision()
model.Session.add(revision)
# new_revision then calls:
# SQLAlchemySession.set_revision(self.session, rev), which is:
# vdm.sqlalchemy.base.SQLAlchemySession.set_revision() which did this:
revision.id = str(uuid.uuid4())
model.Session.add(revision)
model.Session.flush()

# In CKAN<=2.8 the revisioned tables (e.g. Package) had a mapper
# extension: vdm.sqlalchemy.Revisioner(package_revision_table)
# that triggered on table changes and records a copy in the
# corresponding revision table (e.g. PackageRevision).

# In Revisioner.before_insert() it does this:
for instance in instances:
is_changed = True # fake: check_real_change(instance)
if is_changed:
# set_revision(instance)
# which does this:
instance.revision = revision
instance.revision_id = revision.id
# Unfortunately modifying the Package (or whatever the instances are)
# will create another Activity object when we save the session, so
# delete that
existing_latest_activity = model.Session.query(model.Activity) \
.order_by(model.Activity.timestamp.desc()).first()
model.Session.commit()
new_latest_activity = model.Session.query(model.Activity) \
.order_by(model.Activity.timestamp.desc()).first()
if new_latest_activity != existing_latest_activity:
new_latest_activity.delete()
model.Session.commit()

# In Revision.after_update() or after_insert() it does this:
# self.make_revision(instance, mapper, connection)
# which is vdm.sqlalchemy.base.Revisioner.make_revision()
# which copies the Package to make a new PackageRevision
for instance in instances:
colvalues = {}
mapper = inspect(type(instance))
table = mapper.tables[0]
for key in table.c.keys():
val = getattr(instance, key)
colvalues[key] = val
assert instance.revision.id
colvalues['revision_id'] = instance.revision.id
colvalues['continuity_id'] = instance.id

# Allow for multiple SQLAlchemy flushes/commits per VDM revision
revision_table = \
RevisionTableMappings.instance() \
.revision_table_mapping[type(instance)]
ins = revision_table.insert().values(colvalues)
model.Session.execute(ins)

# the related Activity would get the revision_id added to it too.
# Here we simply assume it's the latest activity.
activity = model.Session.query(model.Activity) \
.order_by(model.Activity.timestamp.desc()) \
.first()
activity.revision_id = revision.id
model.Session.flush()

# In CKAN<=2.8 the session extension CkanSessionExtension had some
# extra code in before_commit() which wrote `revision_timestamp` and
# `expired_timestamp` values in the revision tables
# (e.g. package_revision) so that is added here:
for instance in instances:
if not hasattr(instance, u'__revision_class__'):
continue
revision_cls = instance.__revision_class__
revision_table = \
RevisionTableMappings.instance() \
.revision_table_mapping[type(instance)]
# when a normal active transaction happens

# this is an sql statement as we do not want it in object cache
model.Session.execute(
revision_table.update().where(
and_(revision_table.c.id == instance.id,
revision_table.c.current == u'1')
).values(current=u'0')
)

q = model.Session.query(revision_cls)
q = q.filter_by(expired_timestamp=datetime.datetime(9999, 12, 31),
id=instance.id)
results = q.all()
for rev_obj in results:
values = {}
if rev_obj.revision_id == revision.id:
values['revision_timestamp'] = revision.timestamp
else:
values['expired_timestamp'] = revision.timestamp
model.Session.execute(
revision_table.update().where(
and_(revision_table.c.id == rev_obj.id,
revision_table.c.revision_id == rev_obj.revision_id)
).values(**values)
)


# Revision tables (singleton)
class RevisionTableMappings(object):
_instance = None

@classmethod
def instance(cls):
if not cls._instance:
cls._instance = cls()
return cls._instance

def __init__(self):
# This uses the strangler pattern to gradually move the revision model
# out of ckan/model and into this file.
# We start with references to revision model in ckan/model/ and then
# gradually move the definitions here
self.revision_table = model.revision_table

self.Revision = model.Revision

self.package_revision_table = model.package_revision_table
self.PackageRevision = model.PackageRevision

self.resource_revision_table = model.resource_revision_table
self.ResourceRevision = model.ResourceRevision

self.extra_revision_table = model.extra_revision_table
self.PackageExtraRevision = create_object_version(
model.meta.mapper, model.PackageExtra,
self.extra_revision_table)

self.package_tag_revision_table = model.package_tag_revision_table
self.PackageTagRevision = model.PackageTagRevision

self.member_revision_table = model.member_revision_table
self.MemberRevision = model.MemberRevision

self.group_revision_table = model.group_revision_table
self.GroupRevision = model.GroupRevision

self.group_extra_revision_table = model.group_extra_revision_table
self.GroupExtraRevision = create_object_version(
model.meta.mapper, model.GroupExtra,
self.group_extra_revision_table)

self.package_relationship_revision_table = \
model.package_relationship_revision_table
self.PackageRelationshipRevision = model.PackageRelationshipRevision

self.system_info_revision_table = model.system_info_revision_table
self.SystemInfoRevision = model.SystemInfoRevision

self.revision_table_mapping = {
model.Package: self.package_revision_table,
model.Resource: self.resource_revision_table,
model.PackageExtra: self.extra_revision_table,
model.PackageTag: self.package_tag_revision_table,
model.Member: self.member_revision_table,
model.Group: self.group_revision_table,
}


# It's easiest if this code works on all versions of CKAN. After CKAN 2.8 the
# revision model is separate from the main model.
# (RevisionTableMappings will be defined above)

# CKAN<=2.8
revision_model = model
try:
model.PackageExtraRevision
# CKAN<=2.8
revision_model = model
except AttributeError:
# CKAN>2.8
revision_model = RevisionTableMappings.instance()
3 changes: 3 additions & 0 deletions ckan/model/__init__.py
Expand Up @@ -60,10 +60,12 @@
from group_extra import (
GroupExtra,
group_extra_table,
group_extra_revision_table,
)
from package_extra import (
PackageExtra,
package_extra_table,
extra_revision_table,
)
from resource import (
Resource,
Expand All @@ -88,6 +90,7 @@
)
from package_relationship import (
PackageRelationship,
PackageRelationshipRevision,
package_relationship_table,
package_relationship_revision_table,
)
Expand Down

0 comments on commit 15507df

Please sign in to comment.