From 0ebfa0553b2620db0d50ca563afdebf9dc2e32eb Mon Sep 17 00:00:00 2001 From: Albert Louis Rossi Date: Wed, 6 Sep 2023 08:00:10 -0500 Subject: [PATCH] dcache-qos: add policy support to scanner (qos rule engine 6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation: Implement the rule engine extension to QoS services. Modification: This patch modifies the periodic system-wide scanning of files to two ways: 1. The `nearline` scan no longer looks at all `NEARLINE CUSTODIAL` files, but only those for which a QoS policy is defined. It is no longer disabled by default. 2. The `online` scan by default still loops through all such files in the namespace in their natural order, but this form of scan can be swapped off for a full periodic pool scan (à la Resilience) instead. For the advantages and disadvantages of each, see the section in TheBook. Most of the rest of this patch is simply renaming or non-functional code changes. Result: Scanner better adapted to the new rule engine semantics. Target: master Patch: https://rb.dcache.org/r/14074 Depends-on: #14073 Acked-by: Tigran --- .../src/main/markdown/config-qos-engine.md | 26 +- .../scanner/admin/QoSScannerAdmin.java | 71 +++-- .../services/scanner/data/ScanSummary.java | 4 + .../scanner/data/SystemOperationMap.java | 272 +++++++++++------- .../scanner/data/SystemScanOperation.java | 12 +- .../scanner/data/SystemScanSummary.java | 10 +- .../scanner/handlers/NamespaceOpHandler.java | 6 +- .../scanner/handlers/SysOpHandler.java | 8 +- .../namespace/LocalNamespaceAccess.java | 81 +++--- .../scanner/namespace/NamespaceAccess.java | 7 + .../resources/org/dcache/qos/qos-scanner.xml | 13 +- skel/share/defaults/qos-scanner.properties | 105 ++++--- skel/share/services/qos-scanner.batch | 11 +- 13 files changed, 381 insertions(+), 245 deletions(-) diff --git a/docs/TheBook/src/main/markdown/config-qos-engine.md b/docs/TheBook/src/main/markdown/config-qos-engine.md index f910275cfb5..cf45c61d0c1 100644 --- a/docs/TheBook/src/main/markdown/config-qos-engine.md +++ b/docs/TheBook/src/main/markdown/config-qos-engine.md @@ -663,7 +663,7 @@ restage it before considering it inaccessible. ### Pool scan vs Sys scan For the scanner component, there are two kinds of scans. The pool scan runs a query -by location (= pool) and verifies each of the files that the namespace indicates is +by location (= pool) and verifies each of the ``ONLINE`` files that the namespace indicates is resident on that pool. This is generally useful for disk-resident replicas, but will not be able to detect missing replicas (say, from faulty migration, where the old pool is no longer in the pool configuration). Nevertheless, a pool scan @@ -685,10 +685,26 @@ copy, regardless of the current available pools, and will stage it back in if it SCANNING, QOS vs Resilience Formerly (in resilience), individual pool scans were both triggered by pool state changes - and were run periodically; in QoS, however, they are only triggered by state changes - (or by an explicit admin command). The sys scans, on the other hand, run periodically - in the background, touching each file in the natural order of their primary key in the - namespace. + and were run periodically; in QoS, they are still triggered by state changes + (or by an explicit admin command), but there is an option as to how to run ONLINE scans + periodically. By enabling 'online' scans (the default), the sys scans will + touch each file in the natural order of their primary key in the namespace. + The advantage to this is avoiding scanning the same file more than once if + it has more than one location. The disadvantage is that files whose locations + are currently offline or have been removed from the dCache configuration will + trigger an alarm. If 'online' is disabled, the old-style pool scan (more properly, + location-based scan) will be triggered instead. This will look at only ONLINE + files on IDLE pools that are ENABLED, but will end up running redundant checks + for files with multiple replicas. + + With the advent of the rule engine (9.2), the NEARLINE scan has been limited to + files with a defined qos policy. + + NEARLINE is no longer turned off by default, since it no longer necessarily + encompasses all files on tape, but just the ones for which the policy state + currently involves state. Of course, if the majority of files in the dCache + instance have a policy, then this scan will again involve a much longer run-time + and thus the window should be adjusted accordingly. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ---------------------- diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/admin/QoSScannerAdmin.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/admin/QoSScannerAdmin.java index 2d6ef3f7f31..70bfa1f6c27 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/admin/QoSScannerAdmin.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/admin/QoSScannerAdmin.java @@ -771,7 +771,7 @@ protected String doCall() throws Exception { @Command(name = "sys cancel", hint = "cancel background scan operations", description = "Cancels operations matching the options; either single operation, all " - + "online or nearline; notifies the verifier.") + + "online or qos; notifies the verifier.") class SysCancelCommand extends InitializerAwareCommand { SysCancelCommand() { @@ -782,9 +782,9 @@ class SysCancelCommand extends InitializerAwareCommand { usage = "Cancel the sub-operation matching this uuid.") String id; - @Option(name = "nearline", - usage = "Cancel all nearline sub-operations.") - boolean nearline = false; + @Option(name = "qos", + usage = "Cancel all qosNearline sub-operations.") + boolean qos = false; @Option(name = "online", usage = "Cancel all online sub-operations.") @@ -795,7 +795,7 @@ protected String doCall() { if (id != null) { systemOperationMap.cancelSystemScan(id); } else { - if (nearline) { + if (qos) { systemOperationMap.cancelAll(true); } if (online) { @@ -836,21 +836,20 @@ protected String doCall() { hint = "control the periodic check for system scans", description = "Resets the properties governing system scanning, like the periodic interval," - + " whether nearline is enabled, batch size, and the number of concurrent operations allowed.") + + " whether online is enabled, batch size, and the number of concurrent operations allowed.") class SysControlCommand extends InitializerAwareCommand { - @Option(name = "enable-nearline", - usage = "Turn on or off NEARLINE (CUSTODIAL) system scanning.") - Boolean enableNearline; + @Option(name = "enable-online", + usage = "Turn on or off direct namespace ONLINE system scanning" + + " (if false, scans of IDLE ENABLED pools are used).") + Boolean enableOnline; - @Option(name = "nearline-window", - usage = "(one of nearline-window|online-window). " - + "Amount of time which must pass since the last full system scan for it to be run again.") - Integer nearlineWindow; + @Option(name = "qos-nearline-window", + usage = "Amount of time which must pass since the last full system scan for it to be run again.") + Integer qosNearlineWindow; @Option(name = "online-window", - usage = "(one of nearline-window|online-window). " - + "Amount of time which must pass since the last system scan (online files only) for " + usage = "Amount of time which must pass since the last system scan (online files only) for " + "it to be run again.") Integer onlineWindow; @@ -858,9 +857,9 @@ class SysControlCommand extends InitializerAwareCommand { usage = "Maximum number of pnsfids to send to the verifier at a time.") Integer onlineBatch; - @Option(name = "nearline-batch-size", + @Option(name = "qos-nearline-batch-size", usage = "Maximum number of pnsfids to send to the verifier at a time.") - Integer nearlineBatch; + Integer qosNearlineBatch; @Option(name = "max-operations", usage = "Maximum number of concurrent operations permitted; note that this number " @@ -878,12 +877,12 @@ class SysControlCommand extends InitializerAwareCommand { @Override protected String doCall() { - if (enableNearline != null) { - systemOperationMap.setNearlineRescanEnabled(enableNearline); + if (enableOnline != null) { + systemOperationMap.setOnlineScanEnabled(enableOnline); } - if (nearlineBatch != null) { - systemOperationMap.setNearlineBatchSize(nearlineBatch); + if (qosNearlineBatch != null) { + systemOperationMap.setQosNearlineBatchSize(qosNearlineBatch); } if (onlineBatch != null) { @@ -894,10 +893,10 @@ protected String doCall() { systemOperationMap.setMaxConcurrentRunning(maxOperations); } - if (nearlineWindow != null) { - systemOperationMap.setNearlineRescanWindow(nearlineWindow); + if (qosNearlineWindow != null) { + systemOperationMap.setQosNearlineRescanWindow(qosNearlineWindow); if (unit != null) { - systemOperationMap.setNearlineRescanWindowUnit(unit); + systemOperationMap.setQosNearlineRescanWindowUnit(unit); } } else if (onlineWindow != null) { systemOperationMap.setOnlineRescanWindow(onlineWindow); @@ -913,30 +912,30 @@ protected String doCall() { @Command(name = "sys scan", hint = "initiate an ad hoc background scan.", - description = - "If this is nearline, it will bypass the enable flag; however, if a scan of " - + "the requested type is already running, it will not be automatically canceled.") + description = "If a scan of the requested type is already running, " + + "it will not be automatically canceled.") class SysScanCommand extends InitializerAwareCommand { SysScanCommand() { super(initializer); } - @Option(name = "nearline", - usage = "Scan nearline custodial files with cached replicas. " - + "For most deployments, NEARLINE will be the more costly scan and could run for many " - + "days, depending on the size of the namespace.") - boolean nearline = false; + @Option(name = "qos", + usage = "Scan NEARLINE files for which a QoS policy has been defined.") + boolean qos = false; @Option(name = "online", - usage = "Scan online files (both REPLICA and CUSTODIAL). " - + "Equivalent to scanning all pools for persistent files.") + usage = "Scan online files (both REPLICA and CUSTODIAL). Depending on whether " + + "online is enabled (true by default), it will either scan the namespace " + + "entries or will trigger a scan of all IDLE ENABLED pools. Setting this scan " + + "to be run periodically should take into account the size of the namespace or " + + "the number of pools, and the proportion of ONLINE files they contain.") boolean online = false; @Override protected String doCall() { try { - if (nearline) { + if (qos) { systemOperationMap.startScan(true); } @@ -944,7 +943,7 @@ protected String doCall() { systemOperationMap.startScan(false); } - if (!nearline && !online) { + if (!qos && !online) { return "No scan started; must be -online, -nearline or both."; } } catch (PermissionDeniedCacheException e) { diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/ScanSummary.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/ScanSummary.java index 91a44e542e7..2402501f0d0 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/ScanSummary.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/ScanSummary.java @@ -83,6 +83,10 @@ public synchronized void incrementCount() { ++count; } + public synchronized void incrementCount(long count) { + this.count += count; + } + public synchronized boolean isCancelled() { return canceled; } diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemOperationMap.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemOperationMap.java index cf852a70e66..8867df72558 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemOperationMap.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemOperationMap.java @@ -59,6 +59,7 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */ package org.dcache.qos.services.scanner.data; +import com.google.common.base.Throwables; import com.google.common.collect.EvictingQueue; import diskCacheV111.util.CacheException; import diskCacheV111.util.PermissionDeniedCacheException; @@ -66,9 +67,11 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING import java.util.Date; import java.util.HashMap; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.annotation.concurrent.GuardedBy; +import org.dcache.qos.services.scanner.data.PoolScanOperation.State; import org.dcache.qos.services.scanner.data.ScanOperation.ScanLabel; import org.dcache.qos.services.scanner.handlers.SysOpHandler; import org.dcache.qos.services.scanner.util.QoSScannerCounters; @@ -77,15 +80,14 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING import org.slf4j.LoggerFactory; /** - * Maintains two maps corresponding to ONLINE and NEARLINE operations. The runnable method checks + * Maintains two maps corresponding to ONLINE and QOS_NEARLINE operations. The runnable method checks * for the period window expiration and launches the background operations when appropriate. *

- * The ONLINE operations look at ONLINE REPLICA and CUSTODIAL files. The NEARLINE operations look - * at NEARLINE CUSTODIAL files which currently have one or more cached disk copies. NEARLINE - * REPLICA files (currently interpreted as volatile) are ignored. + * The ONLINE operations look at ONLINE REPLICA and CUSTODIAL files. The QOS_NEARLINE operations + * look at files for which a policy is defined but whose current AL/RP is NEARLINE CUSTODIAL. *

- * Since the NEARLINE scan can take a very long time, it is turned off by default. Its default batch - * size is also lower than the ONLINE scans, to give the latter priority when running concurrently. + * If ONLINE scans are not activated, an IDLE-ENABLED pool scan is scheduled instead (as formerly + * with resilience). The online window and unit then apply to the pool scans. *

* Provides methods for cancellation of running scans, and for ad hoc submission of a scan. *

@@ -99,38 +101,53 @@ public class SystemOperationMap extends ScanOperationMap { = "\n\t%s days, %s hours, %s minutes, %s seconds\n\n"; private static final byte ONLINE = 0x2; - private static final byte NEARLINE = 0x4; + private static final byte QOS_NEARLINE = 0x4; + + private static final PoolFilter ALL_IDLE_ENABLED_POOLS; + + static { + PoolFilter filter = new PoolFilter(); + filter.setState(Set.of(State.IDLE.name())); + filter.setPoolStatus("ENABLED"); + ALL_IDLE_ENABLED_POOLS = filter; + } private final Map online = new HashMap<>(); - private final Map nearline = new HashMap<>(); + private final Map qosNearline = new HashMap<>(); private final EvictingQueue history = EvictingQueue.create(100); private volatile int state; private volatile Integer onlineRescanWindow; private volatile TimeUnit onlineRescanWindowUnit; + private volatile boolean onlineScanEnabled = false; - private volatile Integer nearlineRescanWindow; - private volatile TimeUnit nearlineRescanWindowUnit; - private volatile boolean nearlineScanEnabled = false; + private volatile Integer qosNearlineRescanWindow; + private volatile TimeUnit qosNearlineRescanWindowUnit; - private volatile int nearlineBatchSize = 200000; - private volatile int onlineBatchSize = 500000; + private volatile int onlineBatchSize = 200000; + private volatile int qosNearlineBatchSize = 500000; private long lastOnlineScanStart; private long lastOnlineScanEnd; - private long lastNearlineScanStart; - private long lastNearlineScanEnd; + private long lastQosNearlineScanStart; + private long lastQosNearlineScanEnd; + private long lastPoolScanStart; + private long nextPoolScanStart; private SysOpHandler handler; private QoSScannerCounters counters; + private PoolOperationMap poolOperationMap; + public SystemOperationMap() { long now = System.currentTimeMillis(); lastOnlineScanStart = now; lastOnlineScanEnd = now; - lastNearlineScanStart = now; - lastNearlineScanEnd = now; + lastQosNearlineScanStart = now; + lastQosNearlineScanEnd = now; + lastPoolScanStart = now; + nextPoolScanStart = now; } public void cancelSystemScan(String id) { @@ -147,12 +164,12 @@ public void cancelSystemScan(String id) { } } - public void cancelAll(boolean nearline) { + public void cancelAll(boolean qos) { lock.lock(); try { - if (nearline) { - this.nearline.values().forEach(this::cancel); - this.nearline.clear(); + if (qos) { + this.qosNearline.values().forEach(this::cancel); + this.qosNearline.clear(); } else { this.online.values().forEach(this::cancel); this.online.clear(); @@ -161,9 +178,9 @@ public void cancelAll(boolean nearline) { lock.unlock(); } - if (nearline) { - state &= (~NEARLINE); - lastNearlineScanEnd = System.currentTimeMillis(); + if (qos) { + state &= (~QOS_NEARLINE); + lastQosNearlineScanEnd = System.currentTimeMillis(); } else { state &= (~ONLINE); lastOnlineScanEnd = System.currentTimeMillis(); @@ -172,15 +189,15 @@ public void cancelAll(boolean nearline) { public String configSettings() { return String.format("system online scan window %s %s\n" - + "system nearline scan is %s\n" - + "system nearline scan window %s %s\n" + + "system online scan is %s\n" + + "system qosNearline scan window %s %s\n" + "max concurrent operations %s\n" + "period set to %s %s\n\n", onlineRescanWindow, onlineRescanWindowUnit, - nearlineScanEnabled ? "on" : "off", - nearlineRescanWindow, - nearlineRescanWindowUnit, + onlineScanEnabled ? "on" : "off", + qosNearlineRescanWindow, + qosNearlineRescanWindowUnit, maxConcurrentRunning, timeout, timeoutUnit); @@ -193,8 +210,7 @@ public void getInfo(PrintWriter pw) { counters.appendRunning(builder); counters.appendSweep(builder); builder.append("\n") - .append(String.format("last online scan start %s\n" - + "last online scan end %s\n", + .append(String.format("last online scan start %s\nlast online scan end %s\n", new Date(lastOnlineScanStart), new Date(lastOnlineScanEnd))); long seconds = TimeUnit.MILLISECONDS.toSeconds(lastOnlineScanEnd - lastOnlineScanStart); @@ -202,15 +218,19 @@ public void getInfo(PrintWriter pw) { seconds = 0L; } counters.appendDHMSElapsedTime(seconds, SCAN_DURATION, builder); - builder.append(String.format("last nearline scan start %s\n" - + "last nearline scan end %s\n", - new Date(lastNearlineScanStart), - new Date(lastNearlineScanEnd))); - seconds = TimeUnit.MILLISECONDS.toSeconds(lastNearlineScanEnd - lastNearlineScanStart); + builder.append(String.format("last qosNearline (nearline) scan start %s\n" + + "last qosNearline (nearline) scan end %s\n", + new Date(lastQosNearlineScanStart), + new Date(lastQosNearlineScanEnd))); + seconds = TimeUnit.MILLISECONDS.toSeconds(lastQosNearlineScanEnd - lastQosNearlineScanStart); if (seconds < 0L) { seconds = 0L; } counters.appendDHMSElapsedTime(seconds, SCAN_DURATION, builder); + builder.append("\n") + .append(String.format("last pool scan start %s\nnext pool scan start %s\n", + new Date(lastPoolScanStart), + new Date(nextPoolScanStart))); builder.append("\n"); pw.print(builder); } @@ -221,10 +241,10 @@ public String getSystemScanStatus() { try { online.entrySet().forEach(e -> builder.append(e.getValue()).append("\n")); - if (!online.isEmpty() && !nearline.isEmpty()) { + if (!online.isEmpty() && !qosNearline.isEmpty()) { builder.append("------------------------------------------------\n"); } - nearline.entrySet().forEach(e -> + qosNearline.entrySet().forEach(e -> builder.append(e.getValue()).append("\n")); } finally { lock.unlock(); @@ -249,22 +269,29 @@ public String historyDescending() { public void runScans() { lock.lock(); try { - long now = System.currentTimeMillis(); - - if (nearlineScanEnabled && (state & NEARLINE) != NEARLINE && - now - lastNearlineScanEnd >= nearlineRescanWindowUnit.toMillis( - nearlineRescanWindow)) { - LOGGER.info("runScans: starting NEARLINE system scans"); + if (!isQosNearlineRunning() && isQosNearlinePastExpiration()) { + LOGGER.info("runScans: starting qosNearline system scans"); start(true); } - if ((state & ONLINE) != ONLINE && - now - lastOnlineScanEnd >= onlineRescanWindowUnit.toMillis(onlineRescanWindow)) { - LOGGER.info("runScans: starting ONLINE system scans"); - start(false); + if (!isOnlineRunning()) { + /* + * If online is enabled, do the direct namespace scan; + * otherwise, schedule a pool scan. + */ + if (isOnlinePastExpiration()) { + if (onlineScanEnabled) { + LOGGER.info("runScans: starting ONLINE system scans"); + start(false); + } else { + LOGGER.info("runScans: starting IDLE POOL scans"); + startPoolScans(); + } + } } } catch (CacheException e) { - + LOGGER.error("runScans failed: {}, cause {}.", e.getMessage(), + String.valueOf(Throwables.getRootCause(e))); } finally { lock.unlock(); } @@ -278,20 +305,20 @@ public void setHandler(SysOpHandler handler) { this.handler = handler; } - public void setNearlineBatchSize(Integer nearlineBatchSize) { - this.nearlineBatchSize = nearlineBatchSize; + public void setQosNearlineBatchSize(Integer qosNearlineBatchSize) { + this.qosNearlineBatchSize = qosNearlineBatchSize; } - public void setNearlineRescanEnabled(boolean enableFull) { - nearlineScanEnabled = enableFull; + public void setOnlineScanEnabled(boolean enabled) { + onlineScanEnabled = enabled; } - public void setNearlineRescanWindow(int nearlineRescanWindow) { - this.nearlineRescanWindow = nearlineRescanWindow; + public void setQosNearlineRescanWindow(int qosNearlineRescanWindow) { + this.qosNearlineRescanWindow = qosNearlineRescanWindow; } - public void setNearlineRescanWindowUnit(TimeUnit nearlineRescanWindowUnit) { - this.nearlineRescanWindowUnit = nearlineRescanWindowUnit; + public void setQosNearlineRescanWindowUnit(TimeUnit qosNearlineRescanWindowUnit) { + this.qosNearlineRescanWindowUnit = qosNearlineRescanWindowUnit; } public void setOnlineBatchSize(Integer onlineBatchSize) { @@ -306,30 +333,34 @@ public void setOnlineRescanWindowUnit(TimeUnit onlineRescanWindowUnit) { this.onlineRescanWindowUnit = onlineRescanWindowUnit; } - public void startScan(boolean nearline) throws PermissionDeniedCacheException { + public void setPoolOperationMap(PoolOperationMap poolOperationMap) { + this.poolOperationMap = poolOperationMap; + } + + public void startScan(boolean qos) throws PermissionDeniedCacheException { lock.lock(); try { - if (nearline) { - if ((state & NEARLINE) == NEARLINE) { - throw new PermissionDeniedCacheException("nearline scans are already running; " + if (qos) { + if (isQosNearlineRunning()) { + throw new PermissionDeniedCacheException("qosNearline scans are already running; " + "use cancel and then call start again."); } - - if (!nearlineScanEnabled) { - LOGGER.info("overriding disabled flag to run nearline scan"); - } + start(true); } else { - if ((state & ONLINE) == ONLINE) { - throw new PermissionDeniedCacheException("online scans are already running; " + if (isOnlineRunning()) { + throw new PermissionDeniedCacheException(onlineScanEnabled ? "online" : "pool" + + " scans are already running; " + "use cancel and then call start again."); } - } - try { - start(nearline); - } catch (CacheException e) { - LOGGER.debug("trouble starting scan: {}.", e.toString()); + if (onlineScanEnabled) { + start(false); + } else { + startPoolScans(); + } } + } catch (CacheException e) { + LOGGER.info("trouble starting scan: {}.", e.toString()); } finally { lock.unlock(); } @@ -370,7 +401,7 @@ protected void clear() { lock.lock(); try { online.clear(); - nearline.clear(); + qosNearline.clear(); history.clear(); } finally { lock.unlock(); @@ -385,7 +416,7 @@ protected void recordSweep(long start, long end) { private void cancel(SystemScanOperation operation) { operation.cancel(); if (operation.task != null) { - operation.task.cancel("qos admin command"); + operation.task.cancel("qosNearline admin command"); } history.add(operation.toString()); handler.handleScanCancelled(operation.id); @@ -395,13 +426,13 @@ private void cancel(SystemScanOperation operation) { private SystemScanOperation get(String id) { SystemScanOperation operation = online.get(id); if (operation == null) { - operation = nearline.get(id); + operation = qosNearline.get(id); } return operation; } - private int getBatchSize(boolean nearline) { - return nearline ? nearlineBatchSize : onlineBatchSize; + private int getBatchSize(boolean qosNearline) { + return qosNearline ? qosNearlineBatchSize : onlineBatchSize; } @GuardedBy("lock") @@ -410,29 +441,48 @@ private void handleDone(SystemScanOperation operation) { remove(operation.id); history.add(operation.toString()); - boolean isNearline = operation.nearline; + boolean isQosPermanent = operation.qos; if (operation.isFinal()) { - if (isNearline && nearline.isEmpty()) { - state &= (~NEARLINE); - lastNearlineScanEnd = System.currentTimeMillis(); + if (isQosPermanent && qosNearline.isEmpty()) { + state &= (~QOS_NEARLINE); + lastQosNearlineScanEnd = System.currentTimeMillis(); } else if (online.isEmpty()) { state &= (~ONLINE); lastOnlineScanEnd = System.currentTimeMillis(); } } else { int loopWidth = maxConcurrentRunning; - int batchSize = getBatchSize(isNearline); + int batchSize = getBatchSize(isQosPermanent); long fromIndex = (operation.from / batchSize) + loopWidth; long toIndex = (operation.to / batchSize) + loopWidth; - submit(fromIndex, toIndex, operation.minMaxIndices, isNearline); + submit(fromIndex, toIndex, operation.minMaxIndices, isQosPermanent); } } + private boolean isOnlineRunning() { + return (state & ONLINE) == ONLINE; + } + + private boolean isQosNearlineRunning() { + return (state & QOS_NEARLINE) == QOS_NEARLINE; + } + + private boolean isQosNearlinePastExpiration() { + return System.currentTimeMillis() - lastQosNearlineScanEnd + >= qosNearlineRescanWindowUnit.toMillis( + qosNearlineRescanWindow); + } + + private boolean isOnlinePastExpiration() { + return System.currentTimeMillis() - lastOnlineScanEnd >= onlineRescanWindowUnit.toMillis( + onlineRescanWindow); + } + @GuardedBy("lock") private void put(SystemScanOperation operation) { - if (operation.isNearline()) { - nearline.put(operation.id, operation); + if (operation.isQos()) { + qosNearline.put(operation.id, operation); } else { online.put(operation.id, operation); } @@ -442,7 +492,7 @@ private void put(SystemScanOperation operation) { private SystemScanOperation remove(String id) { SystemScanOperation operation = online.remove(id); if (operation == null) { - operation = nearline.remove(id); + operation = qosNearline.remove(id); } return operation; } @@ -451,23 +501,30 @@ private SystemScanOperation remove(String id) { * Launch the first tasks up to the max concurrent. */ @GuardedBy("lock") - private void start(boolean nearline) throws CacheException { - if (!nearlineScanEnabled && nearline) { - LOGGER.info("start: overriding disabled flag to run nearline scan"); + private void start(boolean qos) throws CacheException { + if (!onlineScanEnabled && !qos) { + LOGGER.info("start: overriding disabled flag to run online scan"); } - long[] indices = handler.getMinMaxIndices(); + long[] indices = handler.getMinMaxIndices(qos); int count = maxConcurrentRunning; + if (indices[1] == 0) { + LOGGER.info("start: no {} entries to scan.", qos ? "QOS_NEARLINE" : "ONLINE"); + return; + } + LOGGER.info("start: loop count {}.", count); for (int i = 0; i < count; ++i) { - LOGGER.info("start: submitting {} scan {}.", nearline ? "NEARLINE" : "ONLINE", i); - submit(i, i + 1, indices, nearline); + LOGGER.info("start: submitting {} scan {}.", qos ? "QOS_NEARLINE" : "ONLINE", i); + if (submit(i, i + 1, indices, qos) > indices[1]) { + break; + } } - if (nearline) { - lastNearlineScanStart = System.currentTimeMillis(); - state |= NEARLINE; + if (qos) { + lastQosNearlineScanStart = System.currentTimeMillis(); + state |= QOS_NEARLINE; } else { lastOnlineScanStart = System.currentTimeMillis(); state |= ONLINE; @@ -475,21 +532,32 @@ private void start(boolean nearline) throws CacheException { } @GuardedBy("lock") - private void submit(long fromIndex, long toIndex, long[] minmax, boolean nearline) { - int batchSize = getBatchSize(nearline); - SystemScanOperation operation - = new SystemScanOperation(minmax[0] + (fromIndex * batchSize), - minmax[0] + (toIndex * batchSize), - nearline); + private void startPoolScans() { + LOGGER.info("runScans: starting Pools scans"); + poolOperationMap.scan(ALL_IDLE_ENABLED_POOLS); + lastPoolScanStart = System.currentTimeMillis(); + nextPoolScanStart = lastPoolScanStart + onlineRescanWindowUnit.toMillis(onlineRescanWindow); + } + + @GuardedBy("lock") + private long submit(long fromIndex, long toIndex, long[] minmax, boolean qos) { + int batchSize = getBatchSize(qos); + long start = minmax[0] + (fromIndex * batchSize); + long end = Math.min(minmax[0] + (toIndex * batchSize), minmax[1]); + if (start > end) { + return end; + } + SystemScanOperation operation = new SystemScanOperation(start, end, qos); operation.minMaxIndices = minmax; operation.lastScan = System.currentTimeMillis(); submit(operation); + return end; } @GuardedBy("lock") private void submit(SystemScanOperation operation) { operation.task = new SystemScanTask(operation.id, operation.from, operation.to, - operation.nearline, handler); + operation.qos, handler); operation.task.setErrorHandler( e -> LOGGER.info("Error during system scan: {}.", e.toString())); LOGGER.info("Submitting system scan task for operation {}, start index {}, end index {}.", diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemScanOperation.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemScanOperation.java index 66883687ec5..206c39748af 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemScanOperation.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemScanOperation.java @@ -83,7 +83,7 @@ public final class SystemScanOperation extends ScanOperation { final String id; final long from; final long to; - final boolean nearline; + final boolean qos; long lastUpdate; long lastScan; @@ -98,11 +98,11 @@ public final class SystemScanOperation extends ScanOperation { private long failed; private boolean canceled; - SystemScanOperation(long from, long to, boolean nearline) { + SystemScanOperation(long from, long to, boolean qos) { id = UUID.randomUUID().toString(); this.from = from; this.to = to; - this.nearline = nearline; + this.qos = qos; lastUpdate = System.currentTimeMillis(); lastScan = lastUpdate; @@ -116,7 +116,7 @@ public final class SystemScanOperation extends ScanOperation { public String toString() { return String.format(TO_STRING, id, - nearline ? "NEARLINE" : "ONLINE", + qos ? "QOS" : "ONLINE", from, to, canceled ? "CANCELED" : (scanLabel == FINISHED ? "DONE" : "RUNNING"), @@ -167,8 +167,8 @@ boolean isFinal() { return to >= minMaxIndices[1]; } - boolean isNearline() { - return nearline; + boolean isQos() { + return qos; } protected String getFormattedPercentDone() { diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemScanSummary.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemScanSummary.java index f762d6d74a0..13d0eb4522d 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemScanSummary.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/data/SystemScanSummary.java @@ -68,15 +68,15 @@ public final class SystemScanSummary extends ScanSummary { private final long from; private final long to; - private final boolean nearline; + private final boolean qosNearline; private long lastIndex; - public SystemScanSummary(String id, long from, long to, boolean nearline) { + public SystemScanSummary(String id, long from, long to, boolean qosNearline) { super(id); this.from = from; this.to = to; - this.nearline = nearline; + this.qosNearline = qosNearline; } public long getFrom() { @@ -95,7 +95,7 @@ public synchronized void setLastIndex(long lastIndex) { this.lastIndex = lastIndex; } - public synchronized boolean isNearlineScan() { - return nearline; + public synchronized boolean isQosNearline() { + return qosNearline; } } diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/handlers/NamespaceOpHandler.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/handlers/NamespaceOpHandler.java index 5ddffaef872..8fb47ebba24 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/handlers/NamespaceOpHandler.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/handlers/NamespaceOpHandler.java @@ -103,10 +103,14 @@ public ExecutorService getSystemTaskService() { } @Override - public long[] getMinMaxIndices() throws CacheException { + public long[] getMinMaxIndices(boolean qosNearline) throws CacheException { return namespace.getMinMaxInumbers(); } + public int getBatchLimit() { + return namespace.getFetchSize(); + } + public void handlePoolScan(PoolScanSummary scan) { try { namespace.handlePoolScan(scan); diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/handlers/SysOpHandler.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/handlers/SysOpHandler.java index 46b6dc7b920..ba12119f837 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/handlers/SysOpHandler.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/handlers/SysOpHandler.java @@ -74,9 +74,15 @@ public interface SysOpHandler { ExecutorService getSystemTaskService(); /** + * @param qosNearline this scan is for qos permanent policy files or not. * @return the min and max indices for files in the database. */ - long[] getMinMaxIndices() throws CacheException; + long[] getMinMaxIndices(boolean qosNearline) throws CacheException; + + /** + * @return the max size of each batch of requests sent to the verifier. + */ + int getBatchLimit(); /** * Called in response to an admin command to cancel the current scan. diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/namespace/LocalNamespaceAccess.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/namespace/LocalNamespaceAccess.java index b32ffe12b26..f6cd4ed539b 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/namespace/LocalNamespaceAccess.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/namespace/LocalNamespaceAccess.java @@ -95,32 +95,9 @@ public class LocalNamespaceAccess implements NamespaceAccess { static final int FILE_TYPE = 32768; - /** - * Checks all NEARLINE CUSTODIAL files that have cached disk locations. As this kind of file - * predominates on most installations, this scan could run for days. It is here mainly for the - * following reasons: - *

- *

    - *
  1. failed disk+tape to tape transitions which updated the AL, but did not remove - * the sticky bits on the pools;

  2. - *
  3. failed disk to tape which flushed the file but did not remove the sticky bit - * on the source.

  4. - *
- */ - static final String SQL_GET_NEARLINE_PNFSIDS - = "SELECT inumber, ipnfsid FROM t_inodes n WHERE n.itype = " + FILE_TYPE - + " AND n.iaccess_latency = 0" - + " AND n.iretention_policy = 0" - + " AND n.inumber >= ?" - + " AND n.inumber < ?" - + " AND EXISTS (SELECT * FROM t_locationinfo l" - + " WHERE l.inumber = n.inumber" - + " AND l.itype = 1)" - + " ORDER BY n.inumber ASC"; - /** * The regular system scan checks consistency for all ONLINE files, whether REPLICA or - * CUSTODIAL. + * CUSTODIAL. Included are files for which a policy is defined. */ static final String SQL_GET_ONLINE_PNFSIDS = "SELECT inumber, ipnfsid FROM t_inodes WHERE itype = " + FILE_TYPE @@ -129,16 +106,28 @@ public class LocalNamespaceAccess implements NamespaceAccess { + " AND inumber < ?" + " ORDER BY inumber ASC"; + /** + * The qos scan checks consistency for all NEARLINE CUSTODIAL files for which a policy is defined. + */ + static final String SQL_GET_NEARLINE_QOS_PNFSIDS + = "SELECT inumber, ipnfsid FROM t_inodes WHERE itype = " + FILE_TYPE + + " AND iaccess_latency = 0" + + " AND iretention_policy = 0" + + " AND inumber >= ?" + + " AND inumber < ?" + + " AND EXISTS (SELECT * FROM t_qos_policy WHERE id = iqos_policy)" + + " ORDER BY inumber ASC"; + /** * Pool status or config changes should be concerned only with the disk status of the file, so - * we check only ONLINE files again. + * we check only ONLINE files again. Files for which a QoS policy is defined are included. */ static final String SQL_GET_ONLINE_FOR_LOCATION - = "SELECT n.ipnfsid FROM t_locationinfo l, t_inodes n " - + "WHERE l.inumber = n.inumber " - + "AND l.itype = 1 " - + "AND n.iaccess_latency = 1 " - + "AND l.ilocation = ?"; + = "SELECT n.ipnfsid FROM t_locationinfo l, t_inodes n" + + " WHERE l.inumber = n.inumber" + + " AND l.itype = 1" + + " AND n.iaccess_latency = 1" + + " AND l.ilocation = ?"; /** * Get the current range of the entire scan. @@ -146,14 +135,14 @@ public class LocalNamespaceAccess implements NamespaceAccess { static final String SQL_GET_MIN_MAX_INUMBER = "SELECT min(inumber), max(inumber) FROM t_inodes"; static final String SQL_GET_CONTAINED_IN - = "SELECT n.ipnfsid FROM t_locationinfo l, t_inodes n " - + "WHERE n.inumber = l.inumber " - + "AND l.ilocation IN (%s) " - + "AND NOT EXISTS " - + "(SELECT n1.ipnfsid FROM t_locationinfo l1, t_inodes n1 " - + "WHERE n.inumber = l1.inumber " - + "AND n.inumber = n1.inumber " - + "AND l1.ilocation NOT IN (%s))"; + = "SELECT n.ipnfsid FROM t_locationinfo l, t_inodes n" + + " WHERE n.inumber = l.inumber" + + " AND l.ilocation IN (%s)" + + " AND NOT EXISTS" + + " (SELECT n1.ipnfsid FROM t_locationinfo l1, t_inodes n1" + + " WHERE n.inumber = l1.inumber" + + " AND n.inumber = n1.inumber" + + " AND l1.ilocation NOT IN (%s))"; private static final Logger LOGGER = LoggerFactory.getLogger(LocalNamespaceAccess.class); @@ -232,6 +221,10 @@ public void setConnectionPool(DataSource connectionPool) { this.connectionPool = connectionPool; } + public int getFetchSize() { + return fetchSize; + } + @Override public void setFetchSize(int fetchSize) { this.fetchSize = fetchSize; @@ -333,11 +326,13 @@ private void handleQuery(Connection connection, SystemScanSummary scan) List replicas = new ArrayList<>(); QoSScannerVerificationRequest request; - LOGGER.debug("handleQuery: for system scan, inumber {} to inumber {}.", start); + LOGGER.debug("handleQuery: for {} system scan, inumber {} to inumber {}.", + scan.isQosNearline() ? "nearline qos": "online", start); + + String sql = scan.isQosNearline() ? SQL_GET_NEARLINE_QOS_PNFSIDS : SQL_GET_ONLINE_PNFSIDS; try { - statement = connection.prepareStatement(scan.isNearlineScan() ? - SQL_GET_NEARLINE_PNFSIDS : SQL_GET_ONLINE_PNFSIDS); + statement = connection.prepareStatement(sql); statement.setLong(1, start); statement.setLong(2, to); statement.setFetchSize(fetchSize); @@ -353,7 +348,7 @@ private void handleQuery(Connection connection, SystemScanSummary scan) replicas.add(new PnfsId(resultSet.getString(2))); scan.incrementCount(); if (replicas.size() == fetchSize) { - request = new QoSScannerVerificationRequest(scan.getId().toString(), replicas, + request = new QoSScannerVerificationRequest(scan.getId(), replicas, SYSTEM_SCAN, null, null, true); verificationListener.fileQoSVerificationRequested(request); replicas = new ArrayList<>(); @@ -363,7 +358,7 @@ private void handleQuery(Connection connection, SystemScanSummary scan) scan.setLastIndex(index); if (!replicas.isEmpty() && !scan.isCancelled()) { - request = new QoSScannerVerificationRequest(scan.getId().toString(), replicas, + request = new QoSScannerVerificationRequest(scan.getId(), replicas, SYSTEM_SCAN, null, null, true); verificationListener.fileQoSVerificationRequested(request); } diff --git a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/namespace/NamespaceAccess.java b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/namespace/NamespaceAccess.java index a27d9898cbc..ade67de8c52 100644 --- a/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/namespace/NamespaceAccess.java +++ b/modules/dcache-qos/src/main/java/org/dcache/qos/services/scanner/namespace/NamespaceAccess.java @@ -78,6 +78,11 @@ public interface NamespaceAccess { */ long[] getMinMaxInumbers() throws CacheException; + /** + * @return the sql fetch size. + */ + int getFetchSize(); + /** * The main query. * @@ -108,4 +113,6 @@ void printContainedInFiles(List locations, void setConnectionPool(DataSource connectionPool); void setFetchSize(int fetchSize); + + } diff --git a/modules/dcache-qos/src/main/resources/org/dcache/qos/qos-scanner.xml b/modules/dcache-qos/src/main/resources/org/dcache/qos/qos-scanner.xml index 19a49c2b217..9c5ad031bcd 100644 --- a/modules/dcache-qos/src/main/resources/org/dcache/qos/qos-scanner.xml +++ b/modules/dcache-qos/src/main/resources/org/dcache/qos/qos-scanner.xml @@ -48,7 +48,7 @@ Validates messages for handling; will ignore messages is message handling is - paused or if they originate with the qos service itself. + paused or if they originate with the qos service itself. @@ -106,7 +106,7 @@ - + Namespace database storage info extractor. @@ -207,13 +207,14 @@ - - - - + + + + + diff --git a/skel/share/defaults/qos-scanner.properties b/skel/share/defaults/qos-scanner.properties index d08d74c157c..0b89d19407d 100644 --- a/skel/share/defaults/qos-scanner.properties +++ b/skel/share/defaults/qos-scanner.properties @@ -25,14 +25,6 @@ qos-scanner.cell.subscribe=${qos.pool-monitor-topic} # ---- Configuration for namespace database connection pool --------------------------- # -# The database connection pool reuses connections between successive -# database operations. By reusing connections dCache doesn't suffer -# the overhead of establishing new database connections for each -# operation. -# -# The options here determine how qos behaves as the number of concurrent -# requests fluctuates. -# --------------------------------------------------------------------------- # ---- The maximum number of concurrent database connections # @@ -78,15 +70,6 @@ qos.plugins.storage-info-extractor=${dcache.plugins.storage-info-extractor} # ---- Thread queues -------------------------------------------------------------- # -# There are different thread queues associated with each of the qos services. -# -# In general, each (remote) service has an executor for handling the -# processing of incoming messages. The thread pools for these -# are labeled 'submit-threads.' In the case of the verifier, -# there is also a bulk submission pool for handling bulk scan requests. -# -# The verifier, scanner and adjuster in addition also have task thread pools. -# --------------------------------------------------------------------------------- # ---- Thread queue used to handle responses from the verifier. These # involve batched counts, and the amount of update work done on the @@ -142,31 +125,81 @@ qos.limits.scanner.pool-op-max-idle-time=1 # ---- Periodic system scanning # +# For the rationale for the system scanning types, see further in The Book. +# # The following properties control the periodic scanning to check # for qos consistency and initiate any adjustments that may be necessary -# in the case of inconsistent state. These scans touch all the inodes in the namespace -# once and only once, in ascending order and according to the specific query. +# in the case of inconsistent state. # # The scan period refers to the default amount of time between sweeps to check for timeouts. -# It is applied to the main thread of both the Pool Operation map and the System Operation map. # -# The scan windows refers to the amount of time between scheduled periodic -# system diagnostic scans. NEARLINE means files which are CUSTODIAL NEARLINE and currently -# have a cached copy; ONLINE refers to scans of all files with persistent copies, whether -# or not they are REPLICA or CUSTODIAL. NEARLINE is disabled by default because it -# can be very time consuming on large namespaces, but it may also be activated -# using the admin command for occasional diagnostic checks during relatively idle periods. +# The scan windows refer to the amount of time between scheduled periodic +# system diagnostic scans. +# +# QOS NEARLINE refers to files whose QoS policy is defined and whose RP is NEARLINE CUSTODIAL. +# +# ONLINE refers to scans of all files with persistent copies, whether +# or not they are REPLICA or CUSTODIAL, but for which a policy is not defined. +# ONLINE scanning is done by a direct query to the namespace, and is batched +# into requests determined by the batch size. Unlike with resilience, this +# kind of scan will only touch each inode entry once (whereas pool scans may overlap +# when multiple replicas are involved). # -# The batch size for NEARLINE is lowered to serve as an implicit backgrounding or -# de-prioritization (since the scan is done in batches, this allows for pre-emption by -# ONLINE scans if they are running concurrently. +# On the other hand, a general pool scan will only look at files on pools that are +# currently IDLE and UP, so those that are excluded or (temporarily) unattached +# will be skipped. This avoids generating a lot of alarms concerning files without +# disk copies that should exist. +# +# The direct ONLINE scan is enabled by default. To use the pool scan instead, disable +# "online" either via the property or the admin reset command. Be aware, however, that +# unlike resilience, all pools will be scanned, not just those in the resilient/primary +# groups; thus the online window should be set to accommodate the amount of time it +# will take to cycle through the entire set of pools this way. Needless to say, doing +# a direct ONLINE scan probably will take less time than a general pool scan. +# +# The batch size for a direct ONLINE scan is lowered to serve as an implicit backgrounding or +# de-prioritization (since the scan is done in batches, this allows for preemption by +# QOS scans if they are running concurrently). # qos.limits.scanner.scan-period=3 (one-of?MILLISECONDS|SECONDS|MINUTES|HOURS|DAYS)qos.limits.scanner.scan-period.unit=MINUTES -qos.limits.scanner.online-window=24 -(one-of?MILLISECONDS|SECONDS|MINUTES|HOURS|DAYS)qos.limits.scanner.online-window.unit=HOURS -qos.limits.scanner.enable.nearline-scan=false -qos.limits.scanner.nearline-window=5 -(one-of?MILLISECONDS|SECONDS|MINUTES|HOURS|DAYS)qos.limits.scanner.nearline-window.unit=DAYS -qos.limits.scanner.online-batch-size=500000 -qos.limits.scanner.nearline-batch-size=200000 \ No newline at end of file +qos.limits.scanner.qos-nearline-window=12 +(one-of?MILLISECONDS|SECONDS|MINUTES|HOURS|DAYS)qos.limits.scanner.qos-nearline-window.unit=HOURS +qos.limits.scanner.enable.online-scan=true +qos.limits.scanner.online-window=2 +(one-of?MILLISECONDS|SECONDS|MINUTES|HOURS|DAYS)qos.limits.scanner.online-window.unit=DAYS +qos.limits.scanner.qos-nearline-batch-size=500000 +qos.limits.scanner.online-batch-size=200000 + +# ---- Configuration for scanner database connection pool --------------------------- +# +# The database connection pool reuses connections between successive +# database operations. By reusing connections dCache doesn't suffer +# the overhead of establishing new database connections for each +# operation. +# +# The options here determine how qos behaves as the number of concurrent +# requests fluctuates. +# --------------------------------------------------------------------------- +qos-scanner.db.connections.max=10 + +# ---- The minimum number of idle database connections. +# +qos-scanner.db.connections.idle=1 + +(prefix)qos-scanner.db.hikari-properties = Hikari-specific properties + +# ---- Database related settings reserved for internal use. +# +(immutable)qos-scanner.db.name=qos +qos-scanner.db.host=${dcache.db.host} +qos-scanner.db.user=${dcache.db.user} +qos-scanner.db.password=${dcache.db.password} +qos-scanner.db.password.file=${dcache.db.password.file} +qos-scanner.db.url=jdbc:postgresql://${qos-scanner.db.host}/${qos-scanner.db.name}?targetServerType=master +qos-scanner.db.schema.changelog=org/dcache/qos/model/db.changelog-master.xml +qos-scanner.db.schema.auto=${dcache.db.schema.auto} + +# ---- Used with listing of file operations. +# +qos-scanner.db.fetch-size=1000 \ No newline at end of file diff --git a/skel/share/services/qos-scanner.batch b/skel/share/services/qos-scanner.batch index 24b9d740043..fefcc930a03 100644 --- a/skel/share/services/qos-scanner.batch +++ b/skel/share/services/qos-scanner.batch @@ -9,6 +9,9 @@ check -strong qos.home check -strong qos.db.namespace.connections.max check -strong qos.db.namespace.connections.idle check -strong qos.db.namespace.fetch-size +check -strong qos-scanner.db.connections.max +check -strong qos-scanner.db.connections.idle +check -strong qos-scanner.db.fetch-size check -strong qos.plugins.storage-info-extractor check -strong qos.limits.scanner.submit-threads check -strong qos.limits.scanner.task-threads @@ -28,11 +31,11 @@ check -strong qos.limits.scanner.scan-period check -strong qos.limits.scanner.scan-period.unit check -strong qos.limits.scanner.online-window check -strong qos.limits.scanner.online-window.unit -check -strong qos.limits.scanner.enable.nearline-scan -check -strong qos.limits.scanner.nearline-window -check -strong qos.limits.scanner.nearline-window.unit +check -strong qos.limits.scanner.enable.online-scan +check -strong qos.limits.scanner.qos-nearline-window +check -strong qos.limits.scanner.qos-nearline-window.unit check -strong qos.limits.scanner.online-batch-size -check -strong qos.limits.scanner.nearline-batch-size +check -strong qos.limits.scanner.qos-nearline-batch-size check -strong qos.pool-selection-strategy check -strong qos.service.scanner check -strong qos.service.scanner.timeout