From 1656cb63d5a1f8f7072d31634e169f54726a1349 Mon Sep 17 00:00:00 2001 From: Albert Louis Rossi Date: Mon, 9 Apr 2018 14:52:09 -0500 Subject: [PATCH] dcache-resilience: repair over-aggressive handling of broken file messages Motivation: While https://rb.dcache.org/r/10734 (commit a352dfcad686b71a37fb7d5c6edb9378c6e6df3b) made some important repairs to the handling of broken files, it did not recognize a bug which existed in the code path which neglects to check that the file is actually ONLINE and the pool is resilient. This results in nearly double the number of alarms reported (since the message received probably was already associated with a BROKEN_FILE alarm). Modification: Do the necessary checks. Result: Resilience should not be reporting on non-resilient files which have been corrupted. Target: master Request: 4.1 Request: 4.0 Request: 3.2 Request: 3.1 Request: 3.0 Request: 2.16 Acked-by: Dmitry Require-notes: yes Require-book: no --- .../resilience/handlers/FileOperationHandler.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/modules/dcache-resilience/src/main/java/org/dcache/resilience/handlers/FileOperationHandler.java b/modules/dcache-resilience/src/main/java/org/dcache/resilience/handlers/FileOperationHandler.java index f9f31e83dc7..404556aee02 100644 --- a/modules/dcache-resilience/src/main/java/org/dcache/resilience/handlers/FileOperationHandler.java +++ b/modules/dcache-resilience/src/main/java/org/dcache/resilience/handlers/FileOperationHandler.java @@ -167,14 +167,19 @@ public void handleBrokenFileLocation(PnfsId pnfsId, String pool) { = FileUpdate.getAttributes(pnfsId, pool, MessageType.CORRUPT_FILE, namespace); - int actual = 0; - int countable = 0; + if (attributes == null) { + LOGGER.trace("{} not ONLINE.", pnfsId); + return; + } - if (attributes != null) { - actual = attributes.getLocations().size(); - countable = poolInfoMap.getCountableLocations(attributes.getLocations()); + if (!poolInfoMap.isResilientPool(pool)) { + LOGGER.trace("{} not in resilient group.", pool); + return; } + int actual = attributes.getLocations().size(); + int countable = poolInfoMap.getCountableLocations(attributes.getLocations()); + if (actual <= 1) { /* * This is the only copy, or it is not/no longer in the