Skip to content

Commit

Permalink
[7.7][ML] Fix monitoring if orphaned anomaly detector persistent task…
Browse files Browse the repository at this point in the history
…s exist (#57243)

Since #51888 the ML job stats endpoint has returned entries for
jobs that have a persistent task but not job config. Such
orphaned tasks caused monitoring to fail.

This change ignores any such corrupt jobs for monitoring purposes.

Backport of #57235
  • Loading branch information
droberts195 committed May 28, 2020
1 parent b640f0b commit 2a68d42
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,14 @@ private void addJobsUsage(GetJobsStatsAction.Response response, List<Job> jobs)
Map<String, Long> allJobsCreatedBy = jobs.stream().map(this::jobCreatedBy)
.collect(Collectors.groupingBy(item -> item, Collectors.counting()));;
for (GetJobsStatsAction.Response.JobStats jobStats : jobsStats) {
ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
Job job = jobMap.get(jobStats.getJobId());
if (job == null) {
// It's possible we can get job stats without a corresponding job config, if a
// persistent task is orphaned. Omit these corrupt jobs from the usage info.
continue;
}
int detectorsCount = job.getAnalysisConfig().getDetectors().size();
ModelSizeStats modelSizeStats = jobStats.getModelSizeStats();
double modelSize = modelSizeStats == null ? 0.0
: jobStats.getModelSizeStats().getModelBytes();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,46 @@ public void testUsage() throws Exception {
}
}

public void testUsageWithOrphanedTask() throws Exception {
when(licenseState.isMachineLearningAllowed()).thenReturn(true);
Settings.Builder settings = Settings.builder().put(commonSettings);
settings.put("xpack.ml.enabled", true);

Job opened1 = buildJob("opened1", Collections.singletonList(buildMinDetector("foo")),
Collections.singletonMap("created_by", randomFrom("a-cool-module", "a_cool_module", "a cool module")));
GetJobsStatsAction.Response.JobStats opened1JobStats = buildJobStats("opened1", JobState.OPENED, 100L, 3L);
// NB: we have JobStats but no Job for "opened2"
GetJobsStatsAction.Response.JobStats opened2JobStats = buildJobStats("opened2", JobState.OPENED, 200L, 8L);
Job closed1 = buildJob("closed1", Arrays.asList(buildMinDetector("foo"), buildMinDetector("bar"), buildMinDetector("foobar")));
GetJobsStatsAction.Response.JobStats closed1JobStats = buildJobStats("closed1", JobState.CLOSED, 300L, 0);
givenJobs(Arrays.asList(opened1, closed1), Arrays.asList(opened1JobStats, opened2JobStats, closed1JobStats));

MachineLearningFeatureSet featureSet = new MachineLearningFeatureSet(TestEnvironment.newEnvironment(settings.build()),
clusterService, client, licenseState, jobManagerHolder);
PlainActionFuture<Usage> future = new PlainActionFuture<>();
featureSet.usage(future);
XPackFeatureSet.Usage usage = future.get();

XContentSource source;
try (XContentBuilder builder = XContentFactory.jsonBuilder()) {
usage.toXContent(builder, ToXContent.EMPTY_PARAMS);
source = new XContentSource(builder);
}

// The orphaned job should be excluded from the usage info
assertThat(source.getValue("jobs._all.count"), equalTo(2));
assertThat(source.getValue("jobs._all.detectors.min"), equalTo(1.0));
assertThat(source.getValue("jobs._all.detectors.max"), equalTo(3.0));
assertThat(source.getValue("jobs._all.detectors.total"), equalTo(4.0));
assertThat(source.getValue("jobs._all.detectors.avg"), equalTo(2.0));
assertThat(source.getValue("jobs._all.model_size.min"), equalTo(100.0));
assertThat(source.getValue("jobs._all.model_size.max"), equalTo(300.0));
assertThat(source.getValue("jobs._all.model_size.total"), equalTo(400.0));
assertThat(source.getValue("jobs._all.model_size.avg"), equalTo(200.0));
assertThat(source.getValue("jobs._all.created_by.a_cool_module"), equalTo(1));
assertThat(source.getValue("jobs._all.created_by.unknown"), equalTo(1));
}

public void testUsageDisabledML() throws Exception {
when(licenseState.isMachineLearningAllowed()).thenReturn(true);
Settings.Builder settings = Settings.builder().put(commonSettings);
Expand Down

0 comments on commit 2a68d42

Please sign in to comment.