Skip to content

Commit

Permalink
Merge 3cd8b58 into 26df48e
Browse files Browse the repository at this point in the history
  • Loading branch information
mauritsvanrees committed Apr 26, 2018
2 parents 26df48e + 3cd8b58 commit b2fa56d
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 32 deletions.
4 changes: 3 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ Changelog
1.7.3 (unreleased)
------------------

- Nothing changed yet.
- Catch KeyError for ``getPath`` in more cases.
Fixes `issue #14 <https://github.com/collective/collective.catalogcleanup/issues/14>`_.
[maurits]


1.7.2 (2017-09-18)
Expand Down
3 changes: 2 additions & 1 deletion buildout.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ pre-commit-hook = True
return-status-codes = False
flake8 = True
flake8-ignore = P001

# We are doing crazy stuff, so it is bound to be complex:
flake8-max-complexity = 20

[versions]
collective.noindexing = 1.2
Expand Down
84 changes: 55 additions & 29 deletions collective/catalogcleanup/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,21 @@
logger = logging.getLogger('collective.catalogcleanup')


def safe_path(item):
try:
return item.getPath()
except KeyError:
return 'notfound'


def path_len(item):
return len(item.getPath())
try:
return len(item.getPath())
except KeyError:
# For our use case (sort by path length and keep the item
# with the shortest path), it is best to return a high number
# in case of problems.
return 9999


class Cleanup(BrowserView):
Expand Down Expand Up @@ -54,9 +67,10 @@ def __call__(self, dry_run=None):
problems = 0
self.newline()
if getToolByName(context, catalog_id, None) is None:
self.msg('Ignored non existing catalog %s.', catalog_id)
self.msg(
'Ignored non existing catalog {0}.'.format(catalog_id))
continue
self.msg('Handling catalog %s.', catalog_id)
self.msg('Handling catalog {0}.'.format(catalog_id))
problems += self.report(catalog_id)
problems += self.remove_without_uids(catalog_id)
problems += self.remove_without_object(catalog_id)
Expand All @@ -66,14 +80,13 @@ def __call__(self, dry_run=None):
# Non unique ids seem persistent in the reference catalog.
# Running the code several times keeps creating new uids.
problems += self.non_unique_uids(catalog_id)
self.msg('%s: total problems: %d', catalog_id, problems)
self.msg('{0}: total problems: {1:d}'.format(catalog_id, problems))

self.newline()
self.msg('Done with catalog cleanup.')
return '\n'.join(self.messages)

def msg(self, msg, *args, **kwargs):
msg = msg % args
def msg(self, msg, **kwargs):
level = kwargs.get('level', logging.INFO)
logger.log(level, msg)
self.messages.append(msg)
Expand All @@ -88,7 +101,7 @@ def report(self, catalog_id):
context = aq_inner(self.context)
catalog = getToolByName(context, catalog_id)
size = len(catalog)
self.msg('Brains in %s: %d', catalog_id, size)
self.msg('Brains in {0}: {1:d}'.format(catalog_id, size))
# Getting all brains from the catalog may give a different
# result for some reason. Using an empty filter to query the
# catalog will give a DeprecationWarning and may not work on
Expand All @@ -99,8 +112,9 @@ def report(self, catalog_id):
# we get the DeprecationWarning anyway. So be it.
alternative_size = len(catalog(**standard_filter))
if alternative_size != size:
self.msg('Brains in %s using standard filter is different: %d',
catalog_id, alternative_size, level=logging.WARN)
self.msg(
'Brains in {0} using standard filter is different: {1:d}'
.format(catalog_id, alternative_size), level=logging.WARN)
return 1
return 0

Expand All @@ -125,7 +139,8 @@ def remove_without_uids(self, catalog_id):
catalog.uncatalog_object(path)
uncatalog += 1
self.msg(
'%s: removed %d brains without UID.', catalog_id, uncatalog)
'{0}: removed {1:d} brains without UID.'
.format(catalog_id, uncatalog))
return uncatalog

def remove_without_object(self, catalog_id):
Expand All @@ -152,10 +167,12 @@ def remove_without_object(self, catalog_id):
status[obj] = count + 1

for error, value in status.items():
self.msg('%s: removed %d brains with status %s.', catalog_id,
value, error)
self.msg(
'{0}: removed {1:d} brains with status {2}.'
.format(catalog_id, value, error))
if not status:
self.msg('%s: removed no brains in object check.', catalog_id)
self.msg(
'{0}: removed no brains in object check.'.format(catalog_id))
return sum(status.values())

def check_references(self, catalog_id='reference_catalog'):
Expand Down Expand Up @@ -199,13 +216,17 @@ def check_references(self, catalog_id='reference_catalog'):
break

if ref_errors:
self.msg('%s: problem getting %d references.', catalog_id,
ref_errors)
self.msg(
'{0}: problem getting {1:d} references.'
.format(catalog_id, ref_errors))
for error, value in status.items():
self.msg('%s: removed %d brains with status %s for source or '
'target object.', catalog_id, value, error)
self.msg(
'{0}: removed {1:d} brains with status {2} for source or '
'target object.'.format(catalog_id, value, error))
if not status:
self.msg('%s: removed no brains in reference check.', catalog_id)
self.msg(
'{0}: removed no brains in reference check.'
.format(catalog_id))
return sum(status.values()) + ref_errors

def non_unique_uids(self, catalog_id):
Expand All @@ -221,7 +242,7 @@ def non_unique_uids(self, catalog_id):
context = aq_inner(self.context)
catalog = getToolByName(context, catalog_id)
if 'UID' not in catalog.indexes():
self.msg('%s: no UID index.', catalog_id)
self.msg('{0}: no UID index.'.format(catalog_id))
return
non_unique = 0
changed = 0
Expand All @@ -239,7 +260,7 @@ def non_unique_uids(self, catalog_id):
# Sort by length of path.
items = sorted(items, key=path_len)
logger.info('%s: uid %s is kept for %s', catalog_id, uid,
items[0].getPath())
safe_path(items[0]))
for item in items[1:]:
obj = self.get_object_or_status(item)
if isinstance(obj, basestring):
Expand All @@ -261,7 +282,7 @@ def non_unique_uids(self, catalog_id):
obj.reindexObject(idxs=['UID'])
new_uid = obj.UID()
logger.info('%s: uid %s is inherited by %s.',
catalog_id, new_uid, item.getPath())
catalog_id, new_uid, safe_path(item))
continue
# We need a change.
changed += 1
Expand All @@ -278,23 +299,28 @@ def non_unique_uids(self, catalog_id):
if isinstance(obj, Reference):
logger.warn('%s: removing reference %s with '
'duplicate uid %s.', catalog_id,
item.getPath(), old_uid)
safe_path(item), old_uid)
del aq_parent(obj)[obj.getId()]
continue
obj._updateCatalog(context)
obj.reindexObject(idxs=['UID'])
logger.info('{0}: new uid {1} for {2} (was {3}).'.format(
catalog_id, obj.UID(), item.getPath(), old_uid))
catalog_id, obj.UID(), safe_path(item), old_uid))

if obj_errors:
self.msg('%s: problem getting %d objects.', catalog_id,
obj_errors)
self.msg('%s: %d non unique uids found.', catalog_id, non_unique)
self.msg(
'{0}: problem getting {1:d} objects.'
.format(catalog_id, obj_errors))
self.msg(
'{0}: {1:d} non unique uids found.'.format(catalog_id, non_unique))
if self.dry_run:
self.msg('%s: %d items need new unique uids.', catalog_id, changed)
self.msg(
'{0}: {1:d} items need new unique uids.'
.format(catalog_id, changed))
else:
self.msg('%s: %d items given new unique uids.', catalog_id,
changed)
self.msg(
'{0}: {1:d} items given new unique uids.'
.format(catalog_id, changed))
return obj_errors + changed

def get_object_or_status(self, brain, getter='getObject'):
Expand Down
3 changes: 2 additions & 1 deletion collective/catalogcleanup/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
except pkg_resources.DistributionNotFound:
from plone.app.testing import PLONE_FIXTURE
else:
from plone.app.contenttypes.testing import PLONE_APP_CONTENTTYPES_FIXTURE as PLONE_FIXTURE
from plone.app.contenttypes.testing import (
PLONE_APP_CONTENTTYPES_FIXTURE as PLONE_FIXTURE)


class CatalogCleanupLayer(PloneSandboxLayer):
Expand Down

0 comments on commit b2fa56d

Please sign in to comment.