Skip to content

Commit

Permalink
srm-server: add trs configuration admin commands
Browse files Browse the repository at this point in the history
Motivation:

The srm tape recall scheduler's behaviour depends on configurable parameters. When (temporary) changes are desired during a recall campaign, it is undesirable to have to restart the entire SrmManager.

Modification:
Result:

Admin commands are added that allow changing the tape recall scheduler's behaviour without having to restart the SrmManager.

Target: master
Requires-notes: yes
Requires-book: no
Patch: https://rb.dcache.org/r/13155/
Acked-by: Tigran Mkrtchyan
  • Loading branch information
lemora committed Sep 8, 2021
1 parent 97c54ae commit 9071fa1
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public synchronized Long remove() {
fetchTapeInfo();
}

if (!tapesWithJobs.isEmpty() && activeTapesWithJobs.size() < requirementsChecker.maxActiveTapes()) {
if (!tapesWithJobs.isEmpty() && requirementsChecker.getRemainingTapeSlots(activeTapesWithJobs.size()) > 0) {
LOGGER.info(getTapeJobsInfo());
refillActiveTapeSlots();
}
Expand Down Expand Up @@ -149,7 +149,7 @@ private boolean addJobToTapeQueue(SchedulingItemJob job, String tape) {
* to receive the next tapes to activate.
*/
private void refillActiveTapeSlots() {
int freeTapeSlots = requirementsChecker.maxActiveTapes() - activeTapesWithJobs.size();
int freeTapeSlots = requirementsChecker.getRemainingTapeSlots(activeTapesWithJobs.size());

while (freeTapeSlots > 0) {
String tape = selectNextTapeToActivate();
Expand Down Expand Up @@ -243,7 +243,7 @@ private String selectNextTapeToActivate() {

// if configured, finish with checking if a tape has a sufficiently long job queue

if (requirementsChecker.minNumberOfRequestsForTapeSelection() == requirementsChecker.NO_VALUE) {
if (!requirementsChecker.isDefinedMinRequestCount()) {
LOGGER.trace("No tapes available with sufficient recall volume.");
return null;
}
Expand All @@ -253,7 +253,7 @@ private String selectNextTapeToActivate() {
.findFirst()
.get();

boolean queueSufficientlyLong = tapesWithJobs.get(tapeWithLongestQueue).size() >= requirementsChecker.minNumberOfRequestsForTapeSelection();
boolean queueSufficientlyLong = requirementsChecker.isRequestCountSufficient(tapesWithJobs.get(tapeWithLongestQueue).size());
LOGGER.info("Found {}tape with sufficiently long job queue.", (queueSufficientlyLong ? "":"no "));
return queueSufficientlyLong ? tapeWithLongestQueue : null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,38 +1,57 @@
/* dCache - http://www.dcache.org/
*
* Copyright (C) 2021 Deutsches Elektronen-Synchrotron
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.dcache.srm.taperecallscheduling;

import dmg.cells.nucleus.CellCommandListener;
import dmg.cells.nucleus.CellInfoProvider;
import dmg.util.command.Command;
import dmg.util.command.Option;
import org.dcache.util.TimeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.PrintWriter;
import java.time.Duration;
import java.util.concurrent.Callable;

import static com.google.common.base.Preconditions.checkArgument;
import static java.util.concurrent.TimeUnit.HOURS;
import static java.util.concurrent.TimeUnit.MINUTES;

public class TapeRecallSchedulingRequirementsChecker implements CellInfoProvider {
public class TapeRecallSchedulingRequirementsChecker implements CellCommandListener, CellInfoProvider {
private static final Logger LOGGER = LoggerFactory.getLogger(TapeRecallSchedulingRequirementsChecker.class);


public static final long NO_VALUE = -1;
/** time safety margin in milliseconds */
private static final long TIME_SAFETY_MARGIN = 10;
private static final long MIN_RELATIVE_TAPE_RECALL_PERCENTAGE = 95; // of _used_ space

// scheduling parameters

private int maxActiveTapes = 1;
private int minTapeRecallPercentage = 80;
private long minNumberOfRequestsForTapeSelection = NO_VALUE;
private long minJobWaitingTime = MINUTES.toMillis(2);
private long maxJobWaitingTime = HOURS.toMillis(1);
private long tapeinfolessJobWaitingTime = MINUTES.toMillis(10); // queue waiting time for jobs without tape info
private volatile int maxActiveTapes = 1;
private volatile int minTapeRecallPercentage = 80;
private volatile long minNumberOfRequestsForTapeSelection = NO_VALUE;
private volatile long minJobWaitingTime = MINUTES.toMillis(2);
private volatile long maxJobWaitingTime = HOURS.toMillis(1);
private volatile long tapeinfolessJobWaitingTime = MINUTES.toMillis(10);

public void setMaxActiveTapes(int tapeCount) {
checkArgument(tapeCount > 0, "There need to be more than 0 max. active tapes");
this.maxActiveTapes = tapeCount;
maxActiveTapes = tapeCount;
}

public void setMinJobWaitingTime(Duration time) {
Expand Down Expand Up @@ -79,6 +98,23 @@ public long maxJobWaitingTime() {
return maxJobWaitingTime;
}

public long tapeinfolessJobWaitingTime() {
return tapeinfolessJobWaitingTime;
}

public int getRemainingTapeSlots(int taken) {
int remaining = maxActiveTapes - taken;
return remaining < 0 ? 0 : remaining;
}

public boolean isDefinedMinRequestCount() {
return minNumberOfRequestsForTapeSelection != NO_VALUE;
}

public boolean isRequestCountSufficient(long count) {
return count >= minNumberOfRequestsForTapeSelection;
}

/**
* If the oldest request for a tape has exceeded its max. queue lifetime.
* @param tape the scheduling info for the tape in question
Expand All @@ -98,13 +134,14 @@ public boolean isOldestTapeJobExpired(SchedulingInfoTape tape) {
* @param tape the scheduling info for the tape in question
*/
public boolean isNewestTapeJobOldEnough(SchedulingInfoTape tape) {
long minWaitingTime = minJobWaitingTime;
if (tape.getNewestJobArrival() == NO_VALUE) {
return false;
} else if (minJobWaitingTime == NO_VALUE) {
} else if (minWaitingTime == NO_VALUE) {
return true;
}
long ageOfNewestJobArrival = System.currentTimeMillis() - tape.getNewestJobArrival();
long correctedMinAge = minJobWaitingTime + TIME_SAFETY_MARGIN;
long correctedMinAge = minWaitingTime + TIME_SAFETY_MARGIN;
return ageOfNewestJobArrival >= correctedMinAge;
}

Expand All @@ -119,8 +156,9 @@ public boolean isNewestTapeJobOldEnough(SchedulingInfoTape tape) {
* @return Whether the recall volume is sufficient
*/
public boolean isTapeRecallVolumeSufficient(SchedulingInfoTape tape, long recallVolume) {
int percentage = this.minTapeRecallPercentage;
if (!tape.hasTapeInfo()) {
return minTapeRecallPercentage == 0;
return percentage == 0;
}

float percentOfUsedSpace = ((float)recallVolume / (float)tape.getUsedSpace()) * 100;
Expand All @@ -131,24 +169,24 @@ public boolean isTapeRecallVolumeSufficient(SchedulingInfoTape tape, long recall
}

float percentOfCapacity =((float)recallVolume / (float)tape.getCapacity()) * 100;
boolean recallVolumeSufficient = percentOfCapacity >= minTapeRecallPercentage;
boolean recallVolumeSufficient = percentOfCapacity >= percentage;
if (recallVolumeSufficient) {
LOGGER.info("Tape recall volume sufficient: {} of capacity", String.format("%.0f%%",percentOfCapacity));
}
return recallVolumeSufficient;
}

public int compareOldestTapeRequestAge(SchedulingInfoTape first, SchedulingInfoTape second) {
long arrivalFirst = first.getOldestJobArrival();
long arrivalSecond = second.getOldestJobArrival();
if (arrivalFirst == NO_VALUE && arrivalSecond == NO_VALUE) {
long oldestArrival = first.getOldestJobArrival();
long otherArrival = second.getOldestJobArrival();
if (oldestArrival == NO_VALUE && otherArrival == NO_VALUE) {
return 0;
} else if(arrivalFirst == NO_VALUE && arrivalSecond != NO_VALUE) {
} else if(oldestArrival == NO_VALUE && otherArrival != NO_VALUE) {
return -1;
} else if(arrivalFirst != NO_VALUE && arrivalSecond == NO_VALUE) {
} else if(oldestArrival != NO_VALUE && otherArrival == NO_VALUE) {
return 1;
}
return Long.compare(arrivalFirst, arrivalSecond);
return Long.compare(oldestArrival, otherArrival);
}

public boolean isJobExpired(SchedulingItemJob job) {
Expand All @@ -158,22 +196,95 @@ public boolean isJobExpired(SchedulingItemJob job) {
}

public boolean isTapeinfolessJobExpired(SchedulingItemJob job) {
if (tapeinfolessJobWaitingTime == NO_VALUE) {
long waitingTime = tapeinfolessJobWaitingTime;
if (waitingTime == NO_VALUE) {
return isJobExpired(job);
}
long age = System.currentTimeMillis() - job.getCreationTime();
long correctedMaxAge = tapeinfolessJobWaitingTime + TIME_SAFETY_MARGIN;
long correctedMaxAge = waitingTime + TIME_SAFETY_MARGIN;
return age > correctedMaxAge;
}

@Command(name = "trs set tape selection",
hint = "Changes the parameters used for selecting which and how many tapes will be activated " +
"at any point in time in order for their associated requests to be forwarded " +
"to the tape system for recall.")
public class TrsSetTapeSelection implements Callable<String> {
@Option(name = "active", metaVar = "count",
usage = "The maximum number of tapes which may be active at the same time")
Integer active;

@Option(name = "volume", metaVar = "percent",
usage = "The minimum percent of a tape's capacity that needs to be requested for tape selection")
Integer volume;

@Option(name = "requests", metaVar = "count",
usage = "The minimum number of requests needed for a tape for its selection. '-1' disables this criterion")
Integer requests;

@Override
public String call() {
StringBuilder sb = new StringBuilder();
if (active!= null) {
setMaxActiveTapes(active);
sb.append("maxiumm active tapes set to ").append(active).append("\n");
}
if (volume != null) {
setMinTapeRecallPercentage(volume);
sb.append("minimum recall percentage set to ").append(volume).append("\n");
}
if (requests != null) {
setMinNumberOfRequestsForTapeSelection(requests);
sb.append("minimum number of requests per tape ");
sb.append(requests == NO_VALUE ? "disabled" : " set to " + requests).append("\n");
}
return sb.toString();
}
}

@Command(name = "trs set request stay",
hint = "Changes the time parameters that requests stay in the scheduler before leaving or the associated tape can be selected.")
public class TrsSetTimeInQueue implements Callable<String> {
@Option(name = "min", metaVar = "minutes",
usage = "Minimum time a request stays in the scheduler before tape selection")
Long min;

@Option(name = "max", metaVar = "minutes",
usage = "Maximum time a request stays in the scheduler before tape selection")
Long max;

@Option(name = "tapeinfoless", metaVar = "minutes",
usage = "Time a request without associated tape information stays in the scheduler")
Long tapeinfoless;

@Override
public String call() {
StringBuilder sb = new StringBuilder();
if (min != null) {
setMinJobWaitingTime(Duration.ofMinutes(min));
sb.append("min job waiting time set to ").append(min).append(" minutes\n");
}
if (max != null) {
setMaxJobWaitingTime(Duration.ofMinutes(max));
sb.append("max job waiting time set to ").append(max).append(" minutes\n");
}
if (tapeinfoless != null) {
setTapeinfolessJobWaitingTime(Duration.ofMinutes(tapeinfoless));
sb.append("tapeinfoless job waiting time set to ").append(tapeinfoless).append(" minutes\n");
}
return sb.toString();
}
}

@Override
public void getInfo(PrintWriter pw) {
pw.printf("Bring online scheduler parameters:\n");
pw.printf(" Max. active tapes (usually drive count): %s\n", maxActiveTapes);
pw.printf(" Min. recall volume percentage for tape selection: %s\n", minTapeRecallPercentage);
pw.printf(" Min. number of requests for tape selection: %s\n", minNumberOfRequestsForTapeSelection == NO_VALUE ? "-" : minNumberOfRequestsForTapeSelection);
pw.printf(" Min. time requests stay in the queue: %s\n", TimeUtils.describe(Duration.ofMillis(minJobWaitingTime)).orElse("-"));
pw.printf(" Max. time requests stay in the queue: %s\n", TimeUtils.describe(Duration.ofMillis(maxJobWaitingTime)).orElse("-"));
pw.printf(" Min. time requests without tape info stay in the queue: %s\n", tapeinfolessJobWaitingTime == NO_VALUE ? "-" : TimeUtils.describe(Duration.ofMillis(tapeinfolessJobWaitingTime)).orElse("-"));
pw.printf(" Min. time requests stay in the queue: %s\n", TimeUtils.describe(Duration.ofMillis(minJobWaitingTime)).orElse("not set!"));
pw.printf(" Max. time requests stay in the queue: %s\n", TimeUtils.describe(Duration.ofMillis(maxJobWaitingTime)).orElse("not set!"));
pw.printf(" Min. time requests without tape info stay in the queue: %s\n", tapeinfolessJobWaitingTime == NO_VALUE ? "-" :
TimeUtils.describe(Duration.ofMillis(tapeinfolessJobWaitingTime)).orElse("not set!"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ protected void initializeTapefileInfo() {

@Override
public String describe() {
return super.describe() + "\n File type: CSV\n";
return super.describe() + "\n File type: csv\n";
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ protected void initializeTapefileInfo() {

@Override
public String describe() {
return super.describe() + "\n File type: JSON\n";
return super.describe() + "\n File type: json\n";
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.dcache.srm.taperecallscheduling.spi.TapeInfoProvider;
import org.dcache.srm.taperecallscheduling.TapeRecallSchedulingRequirementsChecker;
import org.dcache.srm.taperecallscheduling.TapefileInfo;
import org.dcache.srm.taperecallscheduling.tapeinfoprovider.JsonFileTapeInfoProvider;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand Down
4 changes: 2 additions & 2 deletions skel/share/defaults/srmmanager.properties
Original file line number Diff line number Diff line change
Expand Up @@ -449,13 +449,13 @@ srmmanager.boclustering.max-active-tapes = 1
# Only used when bring-online request clustering is enabled
# Min targeted tape capacity percentage required before passing on requests
#
srmmanager.boclustering.min-tape-recall-percentage = 60
srmmanager.boclustering.min-tape-recall-percentage = 80

# Only used when bring-online request clustering is enabled
# Min number of requests for a tape before being selected if min-tape-recall-percentage is not fulfilled.
# A value of -1 indicates that the request count for a tape will not be taken into account.
#
srmmanager.boclustering.min-request-count-for-tape = 1000
srmmanager.boclustering.min-request-count-for-tape = -1

# Only used when bring-online request clustering is enabled
# Min time a request needs to remain in the queue before the tape is being selected. The goal is to
Expand Down

0 comments on commit 9071fa1

Please sign in to comment.