Skip to content

Commit

Permalink
pool: Extend migration module with -meta-only option
Browse files Browse the repository at this point in the history
Motivation:

A recent bug caused sticky bits to be lost in certain situations. A procedure
to repair the damage involves using a migration job to eliminate duplicates.
To make this work we have to be able to limit migration jobs to only transfer
the meta data and skip any files that do not already have other copies.

Modification:

Add the -meta-only option to migration jobs. If set, a migration task will
only run the steps involving upgrading existing replicas and skip files that
would require transferring the file itself.

Result:

migration copy/cache/move accept the new -meta-only option. Backport is requested
to allow sites with existing versions repair the damange from the above mentioned
bug.

Target: trunk
Request: 2.14
Request: 2.13
Request: 2.12
Require-notes: yes
Require-book: yes
Acked-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Patch: https://rb.dcache.org/r/8875
(cherry picked from commit 857aea9)
(cherry picked from commit 227337e)
(cherry picked from commit f25601b)
  • Loading branch information
gbehrmann committed Dec 15, 2015
1 parent 8fbed27 commit 80a2299
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public Job(MigrationContext context, JobDefinition definition)

_taskParameters = new TaskParameters(context.getPoolStub(), context.getPnfsStub(), context.getPinManagerStub(),
context.getExecutor(), definition.selectionStrategy,
definition.poolList, definition.isEager,
definition.poolList, definition.isEager, definition.isMetaOnly,
definition.computeChecksumOnUpdate, definition.forceSourceMode,
definition.replicas);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ public class JobDefinition
*/
public final boolean isEager;

/**
* Wether the job will only copy meta data to existing replicas or create
* new replicas.
*/
public final boolean isMetaOnly;

/**
* Whether to move pins to the target pool after successful migration.
*/
Expand Down Expand Up @@ -106,6 +112,7 @@ public JobDefinition(List<CacheEntryFilter> filters,
long refreshPeriod,
boolean isPermanent,
boolean isEager,
boolean isMetaOnly,
int replicas,
boolean mustMovePins,
boolean computeChecksumOnUpdate,
Expand All @@ -123,6 +130,7 @@ public JobDefinition(List<CacheEntryFilter> filters,
this.refreshPeriod = refreshPeriod;
this.isPermanent = isPermanent;
this.isEager = isEager;
this.isMetaOnly = isMetaOnly;
this.replicas = replicas;
this.mustMovePins = mustMovePins;
this.computeChecksumOnUpdate = computeChecksumOnUpdate;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,13 @@ public class MigrationCopyCommand implements Callable<String>
usage = "Determines the interpretation of the target names.")
String target = "pool";

@Option(name="meta-only",
category="Target options",
usage="Only transfers meta data to an existing target replica. If a given file " +
"does not have any other replicas on any of the target pools, the file " +
"is skipped.")
boolean metaOnly;

@Option(name="pause-when", metaVar="expr",
category="Lifetime options",
usage = "Pauses the job when the expression becomes true. The job " +
Expand Down Expand Up @@ -823,6 +830,7 @@ public String call() throws IllegalArgumentException
refresh * 1000,
permanent,
eager,
metaOnly,
replicas,
mustMovePins,
verify,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ private class Request implements CacheFileAvailable, Runnable
private final String _pool;
private final boolean _computeChecksumOnUpdate;
private final boolean _forceSourceMode;
private final boolean _isMetaOnly;
private Integer _companion;
private Future<?> _updateTask;

Expand All @@ -191,6 +192,7 @@ public Request(CellPath requestor, PoolMigrationCopyReplicaMessage message)
_pool = message.getPool();
_computeChecksumOnUpdate = message.getComputeChecksumOnUpdate();
_forceSourceMode = message.isForceSourceMode();
_isMetaOnly = message.isMetaOnly();

if (_targetState != PRECIOUS && _targetState != CACHED) {
throw new IllegalArgumentException("State must be either CACHED or PRECIOUS");
Expand Down Expand Up @@ -218,6 +220,9 @@ public synchronized void start()
{
EntryState state = _repository.getState(_pnfsId);
if (state == EntryState.NEW) {
if (_isMetaOnly) {
throw new CacheException(CacheException.FILE_NOT_IN_REPOSITORY, "Pool does not contain " + _pnfsId);
}
_companion = _p2p.newCompanion(_pool, _fileAttributes,
_targetState, _stickyRecords,
this, _forceSourceMode);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,23 @@ public class PoolMigrationCopyReplicaMessage extends PoolMigrationMessage
private final List<StickyRecord> _stickyRecords;
private final boolean _computeChecksumOnUpdate;
private final boolean _forceSourceMode;
private final boolean _isMetaOnly;

public PoolMigrationCopyReplicaMessage(UUID uuid, String pool,
FileAttributes fileAttributes,
EntryState state,
List<StickyRecord> stickyRecords,
boolean computeChecksumOnUpdate,
boolean forceSourceMode)
boolean forceSourceMode,
boolean isMetaOnly)
{
super(uuid, pool, fileAttributes.getPnfsId());
_fileAttributes = checkNotNull(fileAttributes);
_state = checkNotNull(state);
_stickyRecords = checkNotNull(stickyRecords);
_computeChecksumOnUpdate = computeChecksumOnUpdate;
_forceSourceMode = forceSourceMode;
_isMetaOnly = isMetaOnly;
}

public EntryState getState()
Expand All @@ -65,4 +68,9 @@ public boolean isForceSourceMode()
{
return _forceSourceMode;
}

public boolean isMetaOnly()
{
return _isMetaOnly;
}
}
15 changes: 14 additions & 1 deletion modules/dcache/src/main/java/org/dcache/pool/migration/Task.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.dcache.util.ReflectionUtils;
import org.dcache.vehicles.FileAttributes;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Predicates.*;
import static com.google.common.collect.Collections2.transform;
import static com.google.common.collect.Iterables.filter;
Expand Down Expand Up @@ -146,6 +147,15 @@ public boolean isEager()
return _parameters.isEager;
}

/**
* Meta only jobs only upgrade existing replicas - they never copy replicas. If
* no or not enough existing replicas exist, the task fails permanently.
*/
public boolean isMetaOnly()
{
return _parameters.isMetaOnly;
}

/**
* Returns the current target pool, if any.
*/
Expand Down Expand Up @@ -262,6 +272,8 @@ synchronized void updateExistingReplica()
/** FSM Action */
synchronized void initiateCopy()
{
checkState(!isMetaOnly());

try {
initiateCopy(selectPool());
} catch (NoSuchElementException e) {
Expand Down Expand Up @@ -293,7 +305,8 @@ public void run()
_targetState,
_targetStickyRecords,
_parameters.computeChecksumOnUpdate,
_parameters.forceSourceMode);
_parameters.forceSourceMode,
_parameters.isMetaOnly);
CellStub.addCallback(_parameters.pool.send(_target, copyReplicaMessage),
new Callback<>("copy_"), _parameters.executor);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ public class TaskParameters
*/
public final boolean isEager;

/**
* Wether the job will only copy meta data to existing replicas or create
* new replicas.
*/
public final boolean isMetaOnly;

/**
* Whether to verify the checksum when reusing existing target replicas.
*/
Expand All @@ -80,10 +86,10 @@ public class TaskParameters
*/
public final int replicas;

public TaskParameters(CellStub pool, CellStub pnfs, CellStub pinManager,
ScheduledExecutorService executor,
public TaskParameters(CellStub pool, CellStub pnfs, CellStub pinManager, ScheduledExecutorService executor,
PoolSelectionStrategy selectionStrategy, RefreshablePoolList poolList, boolean isEager,
boolean computeChecksumOnUpdate, boolean forceSourceMode, int replicas)
boolean isMetaOnly, boolean computeChecksumOnUpdate, boolean forceSourceMode,
int replicas)
{
this.pool = pool;
this.pnfs = pnfs;
Expand All @@ -92,6 +98,7 @@ public TaskParameters(CellStub pool, CellStub pnfs, CellStub pinManager,
this.selectionStrategy = selectionStrategy;
this.poolList = poolList;
this.isEager = isEager;
this.isMetaOnly = isMetaOnly;
this.computeChecksumOnUpdate = computeChecksumOnUpdate;
this.forceSourceMode = forceSourceMode;
this.replicas = replicas;
Expand Down
54 changes: 48 additions & 6 deletions modules/dcache/src/main/smc/org/dcache/pool/migration/Task.sm
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,15 @@ Entry
{
}
query_success
[ !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
query_success
Failed
{
failPermanently(FILE_NOT_IN_REPOSITORY, "File skipped because it has no existing replicas");
}
}

// We got existing copies. Try to update one of them. Keep
Expand All @@ -68,7 +74,7 @@ Entry
{
}
copy_timeout
[ ctxt.isEager() ]
[ ctxt.isEager() && !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
Expand All @@ -84,7 +90,7 @@ Entry
updateExistingReplica();
}
copy_noroute
[ ctxt.isEager() ]
[ ctxt.isEager() && !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
Expand All @@ -106,9 +112,15 @@ Entry
updateExistingReplica();
}
copy_failure(rc: Integer, cause: Object)
[ !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
copy_failure(rc: Integer, cause: Object)
Failed
{
fail(rc, String.format("Transfer to %s failed (%s)", ctxt.getTarget(), cause));
}
copy_success
Copying
{
Expand Down Expand Up @@ -237,9 +249,15 @@ WaitingForCopyReplicaReply
{
}
copy_success
[ !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
copy_success
Failed
{
failPermanently(FILE_NOT_IN_REPOSITORY, "File skipped because it does not have enough existing replicas");
}
copy_nopools
Failed
{
Expand Down Expand Up @@ -312,10 +330,16 @@ Exit
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.needsMoreReplicas() ]
[ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.needsMoreReplicas() ]
Failed
{
failPermanently(FILE_NOT_IN_REPOSITORY, "File skipped because it does not have enough existing replicas");
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.getMustMovePins() ]
MovingPin
Expand Down Expand Up @@ -382,10 +406,16 @@ Entry
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.needsMoreReplicas() ]
[ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.needsMoreReplicas() ]
Failed
{
failPermanently(FILE_NOT_IN_REPOSITORY, "File skipped because it does not have enough existing replicas");
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.getMustMovePins() ]
MovingPin
Expand Down Expand Up @@ -468,10 +498,16 @@ Exit
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.needsMoreReplicas() ]
[ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.needsMoreReplicas() ]
Failed
{
failPermanently(FILE_NOT_IN_REPOSITORY, "File skipped because it does not have enough existing replicas");
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.getMustMovePins() ]
MovingPin
Expand Down Expand Up @@ -530,10 +566,16 @@ Exit
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.needsMoreReplicas() ]
[ ctxt.needsMoreReplicas() && !ctxt.isMetaOnly() ]
InitiatingCopy
{
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.needsMoreReplicas() ]
Failed
{
failPermanently(FILE_NOT_IN_REPOSITORY, "File skipped because it does not have enough existing replicas");
}
messageArrived(message: PoolMigrationCopyFinishedMessage)
[ ctxt.getMustMovePins() ]
MovingPin
Expand Down

0 comments on commit 80a2299

Please sign in to comment.