Skip to content

Commit

Permalink
[FLINK-19009][metrics] Fixed the downtime metric issue and updated th…
Browse files Browse the repository at this point in the history
…e comment

This closes #13242.
  • Loading branch information
Yang Liu authored and tillrohrmann committed Aug 27, 2020
1 parent e690793 commit 05cbf1e
Showing 1 changed file with 16 additions and 22 deletions.
Expand Up @@ -28,16 +28,16 @@
* A gauge that returns (in milliseconds) how long a job has not been not running any
* more, in case it is in a failing/recovering situation. Running jobs return naturally
* a value of zero.
*
* <p>For jobs that have never run (new not yet scheduled jobs), this gauge returns
* {@value NOT_YET_RUNNING}, and for jobs that are not running any more, it returns
* {@value NO_LONGER_RUNNING}.
*
* <p>For jobs that have never run (new not yet scheduled jobs) or jobs that
* have run again after failing, this gauge returns {@value NOT_FAILING},
* and for jobs that are not running any more, it returns {@value NO_LONGER_RUNNING}.
*/
public class DownTimeGauge implements Gauge<Long> {

public static final String METRIC_NAME = "downtime";

private static final long NOT_YET_RUNNING = 0L;
private static final long NOT_FAILING = 0L;

private static final long NO_LONGER_RUNNING = -1L;

Expand All @@ -55,25 +55,19 @@ public DownTimeGauge(ExecutionGraph executionGraph) {
public Long getValue() {
final JobStatus status = eg.getState();

if (status == JobStatus.RUNNING) {
// running right now - no downtime
return 0L;
}
else if (status.isTerminalState()) {
// not running any more -> finished or not on leader
// not running any more -> finished or not on leader
if (status.isTerminalState()) {
return NO_LONGER_RUNNING;
}
else {
final long runningTimestamp = eg.getStatusTimestamp(JobStatus.RUNNING);
if (runningTimestamp > 0) {
// job was running at some point and is not running now
// we use 'Math.max' here to avoid negative timestamps when clocks change
return Math.max(System.currentTimeMillis() - runningTimestamp, 0);
}
else {
// job was never scheduled so far
return NOT_YET_RUNNING;
}

final long runningTimestamp = eg.getStatusTimestamp(JobStatus.RUNNING);
final long failingTimestamp = eg.getStatusTimestamp(JobStatus.FAILING);

if (failingTimestamp <= runningTimestamp) {
return NOT_FAILING;
} else {
// we use 'Math.max' here to avoid negative timestamps when clocks change
return Math.max(System.currentTimeMillis() - failingTimestamp, 0);
}
}
}

0 comments on commit 05cbf1e

Please sign in to comment.