diff --git a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc index aa7a6357eaf5d..c3b8dc2e1f61e 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc @@ -50,6 +50,56 @@ This is a possible response: { "defaults" : { "anomaly_detectors" : { + "categorization_analyzer" : { + "tokenizer" : "ml_classic", + "filter" : [ + { + "type" : "stop", + "stopwords" : [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + "Mon", + "Tue", + "Wed", + "Thu", + "Fri", + "Sat", + "Sun", + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + "GMT", + "UTC" + ] + } + ] + }, "model_memory_limit" : "1gb", "categorization_examples_limit" : 4, "model_snapshot_retention_days" : 1 diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java index da820b1919cae..a9b509f81bbe1 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java @@ -6,12 +6,16 @@ package org.elasticsearch.xpack.core.ml.job.config; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.analysis.NameOrDefinition; import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction; @@ -245,6 +249,18 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } + /** + * Get the categorization analyzer structured as a generic map. + * This can be used to provide the structure that the XContent serialization but as a Java map rather than text. + * Since it is created by round-tripping through text it is not particularly efficient and is expected to be + * used only rarely. + */ + public Map asMap(NamedXContentRegistry xContentRegistry) throws IOException { + String strRep = Strings.toString(this); + XContentParser parser = JsonXContent.jsonXContent.createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, strRep); + return parser.mapOrdered(); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java index 0c76d62ced94a..0cef48b9ce456 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.tasks.Task; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.ml.MachineLearningField; @@ -19,9 +20,12 @@ import org.elasticsearch.xpack.core.ml.action.MlInfoAction; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits; +import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.ml.process.MlControllerHolder; +import java.io.IOException; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeoutException; @@ -29,13 +33,15 @@ public class TransportMlInfoAction extends HandledTransportAction { private final ClusterService clusterService; + private final NamedXContentRegistry xContentRegistry; private final Map nativeCodeInfo; @Inject - public TransportMlInfoAction(TransportService transportService, ActionFilters actionFilters, - ClusterService clusterService, MlControllerHolder mlControllerHolder) { + public TransportMlInfoAction(TransportService transportService, ActionFilters actionFilters, ClusterService clusterService, + NamedXContentRegistry xContentRegistry, MlControllerHolder mlControllerHolder) { super(MlInfoAction.NAME, transportService, actionFilters, MlInfoAction.Request::new); this.clusterService = clusterService; + this.xContentRegistry = xContentRegistry; try { nativeCodeInfo = mlControllerHolder.getMlController().getNativeCodeInfo(); @@ -70,6 +76,13 @@ private Map anomalyDetectorsDefaults() { defaults.put(AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(), defaultModelMemoryLimit()); defaults.put(AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(), AnalysisLimits.DEFAULT_CATEGORIZATION_EXAMPLES_LIMIT); defaults.put(Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(), Job.DEFAULT_MODEL_SNAPSHOT_RETENTION_DAYS); + try { + defaults.put(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName(), + CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.emptyList()) + .asMap(xContentRegistry).get(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName())); + } catch (IOException e) { + logger.error("failed to convert default categorization analyzer to map", e); + } return defaults; } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java index 6411077453534..cbfdd0c48dda9 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/config/CategorizationAnalyzerConfigTests.java @@ -6,14 +6,20 @@ package org.elasticsearch.xpack.ml.job.config; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.test.AbstractSerializingTestCase; import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; import java.util.Map; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; + public class CategorizationAnalyzerConfigTests extends AbstractSerializingTestCase { @Override @@ -64,6 +70,17 @@ public static CategorizationAnalyzerConfig.Builder createRandomized() { return builder; } + public void testAsMap() throws IOException { + Map map = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(Collections.emptyList()) + .asMap(NamedXContentRegistry.EMPTY); + @SuppressWarnings("unchecked") + Map firstLevel = + (Map) map.get(CategorizationAnalyzerConfig.CATEGORIZATION_ANALYZER.getPreferredName()); + assertThat(firstLevel, not(nullValue())); + String tokenizer = (String) firstLevel.get(CategorizationAnalyzerConfig.TOKENIZER.getPreferredName()); + assertThat(tokenizer, is("ml_classic")); + } + @Override protected Writeable.Reader instanceReader() { return CategorizationAnalyzerConfig::new; diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml index 16ac0973222d5..585ab61fb0596 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml @@ -10,6 +10,7 @@ teardown: "Test ml info": - do: ml.info: {} + - match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" } - match: { defaults.anomaly_detectors.model_memory_limit: "1gb" } - match: { defaults.anomaly_detectors.categorization_examples_limit: 4 } - match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 } @@ -25,6 +26,7 @@ teardown: - do: ml.info: {} + - match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" } - match: { defaults.anomaly_detectors.model_memory_limit: "512mb" } - match: { defaults.anomaly_detectors.categorization_examples_limit: 4 } - match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 } @@ -40,6 +42,7 @@ teardown: - do: ml.info: {} + - match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" } - match: { defaults.anomaly_detectors.model_memory_limit: "1gb" } - match: { defaults.anomaly_detectors.categorization_examples_limit: 4 } - match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 }