Skip to content

Commit

Permalink
srm: fix bring-online rescheduling on restart
Browse files Browse the repository at this point in the history
Several states of BringOnlineFileRequest were not handled correctly.
This patch fixes this.

Target: master
Patch: http://rb.dcache.org/r/6807/
Acked-by: Gerd Behrmann
  • Loading branch information
paulmillar committed Mar 31, 2014
1 parent 268de60 commit 780ae6e
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,28 @@ public final void run() throws NonFatalJobFailure, FatalJobFailure {
protected void onSrmRestartForActiveJob(Scheduler scheduler)
throws IllegalStateTransition
{
// Simply reschedule it (FIXME: this doesn't always work due to
// illegal state transitions)
scheduler.schedule(this);
State state = getState();

switch (state) {
case ASYNCWAIT:
case RETRYWAIT:
// FIXME: we should log the SRM restart in the job's history.
scheduler.schedule(this);
break;

case PRIORITYTQUEUED:
case TQUEUED:
case RUNNING:
setState(State.RESTORED, "Rescheduled after SRM service restart");
scheduler.schedule(this);
break;

// All other states are invalid.
default:
setState(State.FAILED, "Invalid state (" + state + ") detected " +
"after SRM service restart");
break;
}
}

public void askFileId() throws NonFatalJobFailure, FatalJobFailure {
Expand Down
11 changes: 8 additions & 3 deletions modules/srm-server/src/main/java/org/dcache/srm/request/Job.java
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,13 @@ private boolean isValidTransition(State currentState, State newState)
case TQUEUED:
return newState == State.CANCELED
|| newState == State.FAILED
|| newState == State.RUNNING;
|| newState == State.RUNNING
|| newState == State.RESTORED;
case PRIORITYTQUEUED:
return newState == State.CANCELED
|| newState == State.FAILED
|| newState == State.RUNNING;
|| newState == State.RUNNING
|| newState == State.RESTORED;
case RUNNING:
return newState == State.CANCELED
|| newState == State.FAILED
Expand All @@ -373,7 +375,8 @@ private boolean isValidTransition(State currentState, State newState)
|| newState == State.RQUEUED
|| newState == State.READY
|| newState == State.DONE
|| newState == State.RUNNINGWITHOUTTHREAD;
|| newState == State.RUNNINGWITHOUTTHREAD
|| newState == State.RESTORED;
case ASYNCWAIT:
return newState == State.CANCELED
|| newState == State.FAILED
Expand All @@ -399,6 +402,8 @@ private boolean isValidTransition(State currentState, State newState)
return newState == State.CANCELED
|| newState == State.FAILED
|| newState == State.DONE;
case RESTORED:
return newState == State.PRIORITYTQUEUED;
case FAILED:
case DONE:
case CANCELED:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ public void schedule(Job job)
job.setState(State.FAILED, "Site busy: too many queued requests.");
}
break;
case RESTORED:
case ASYNCWAIT:
case RETRYWAIT:
case RUNNINGWITHOUTTHREAD:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ public enum State {

/** A resource limitation or some failure prevented job from completing successfully. */
FAILED ("Failed" ,11, true),
@Deprecated // Kept for compatibility with old databases
RESTORED ("Restored" ,12),

/** Job has triggered a third-party copy. */
Expand Down

0 comments on commit 780ae6e

Please sign in to comment.