Skip to content

Commit

Permalink
resilience: add ability to log resilience activity
Browse files Browse the repository at this point in the history
Motivation:

Providing an activity log, where resilience records its interactions
with other dCache components, may prove useful in understanding
behaviour.

Modification:

Log any cell message sent by resilience.

Make the log-level of the .resilience log file configurable, in the same
fashion other log files are configurable.  The default is not modified,
so (by default) this patch enables no additional logging.

Update system-test to record resilience activity.

Result:

It is now possible to record resilience activity, which may prove
useful.

Target: master
Requires-notes: yes
Requires-book: yes
Request: 5.0
Request: 4.2
Request: 4.1
Request: 4.0
  • Loading branch information
paulmillar committed Mar 18, 2019
1 parent 029f28a commit 68e36e1
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 1 deletion.
Expand Up @@ -61,6 +61,8 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
Expand Down Expand Up @@ -122,6 +124,9 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* to be unsynchronized.</p>
*/
public final class FileOperation {
private static final Logger ACTIVITY_LOGGER =
LoggerFactory.getLogger("org.dcache.resilience-log");

/*
* Stored state. Instead of using enum, to leave less of a memory footprint.
* As above.
Expand Down Expand Up @@ -253,6 +258,7 @@ public void ensureSticky(PoolInfoMap poolInfoMap, CellStub pools) {

String pool = poolInfoMap.getPool(getNullForNil(source));

ACTIVITY_LOGGER.info("Setting system sticky for {} on {}", pnfsId, pool);
pools.send(new CellPath(pool),
new ForceSystemStickyBitMessage(pool, pnfsId));
}
Expand Down
Expand Up @@ -66,15 +66,20 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
import java.util.Collection;
import java.util.Collections;
import java.util.NoSuchElementException;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import diskCacheV111.util.CacheException;
import diskCacheV111.util.PnfsId;
import diskCacheV111.vehicles.PoolManagerPoolInformation;

import dmg.cells.nucleus.CellPath;

import org.dcache.alarms.AlarmMarkerFactory;
import org.dcache.alarms.PredefinedAlarm;
import org.dcache.cells.CellStub;
Expand Down Expand Up @@ -117,6 +122,8 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
public class FileOperationHandler {
private static final Logger LOGGER = LoggerFactory.getLogger(
FileOperationHandler.class);
private static final Logger ACTIVITY_LOGGER =
LoggerFactory.getLogger("org.dcache.resilience-log");

private static final ImmutableList<StickyRecord> ONLINE_STICKY_RECORD
= ImmutableList.of(
Expand Down Expand Up @@ -369,6 +376,14 @@ public Task handleMakeOneCopy(FileAttributes attributes) {
ReplicaState.CACHED, ONLINE_STICKY_RECORD,
Collections.EMPTY_LIST, attributes,
attributes.getAccessTime());
if (ACTIVITY_LOGGER.isInfoEnabled()) {
List<String> allPools = list.getPools().stream()
.map(PoolManagerPoolInformation::getName)
.collect(Collectors.toList());
ACTIVITY_LOGGER.info("Initiating replication of {} from {} to"
+ " pools: {}, offline: {}", pnfsId, source, allPools,
list.getOfflinePools());
}
LOGGER.trace("Created migration task for {}: source {}, list {}.",
pnfsId, source, list);

Expand Down Expand Up @@ -640,6 +655,7 @@ private void removeTarget(PnfsId pnfsId, String target)
pnfsId);

LOGGER.trace("Sending RemoveReplicasMessage {}.", msg);
ACTIVITY_LOGGER.info("Removing {} from {}", pnfsId, target);
Future<RemoveReplicaMessage> future = pools.send(new CellPath(target), msg);

try {
Expand Down
Expand Up @@ -12,6 +12,7 @@ dcache.net.wan.port.min=23000
dcache.net.wan.port.max=25000
dcache.paths.grid-security=${system-test.home}/etc/grid-security
dcache.log.level.events=debug
dcache.log.level.resilience=info
dcache.authn.crl-mode=IF_VALID
dcache.authn.hostcert.refresh=5
dcache.authn.hostcert.refresh.unit=SECONDS
Expand Down
2 changes: 1 addition & 1 deletion skel/etc/logback.xml
Expand Up @@ -290,7 +290,7 @@
<threshold>
<appender>resilience</appender>
<logger>org.dcache.resilience-log</logger>
<level>error</level>
<level>${dcache.log.level.resilience}</level>
</threshold>

</turboFilter>
Expand Down
1 change: 1 addition & 0 deletions skel/share/defaults/dcache.properties
Expand Up @@ -72,6 +72,7 @@
(not-for-services,one-of?off|error|warn|info|debug|trace|all)dcache.log.level.events=off
(not-for-services,one-of?off|error|warn|info|debug|trace|all)dcache.log.level.access=info
(not-for-services,one-of?off|error|warn|info|debug|trace|all)dcache.log.level.zookeeper=info
(not-for-services,one-of?off|error|warn|info|debug|trace|all)dcache.log.level.resilience=error

# How many days to keep access logs
dcache.log.access.max-history=30
Expand Down

0 comments on commit 68e36e1

Please sign in to comment.