From 40a1435190945ec7e91a957d3dd8ec9e2ba3d8b8 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 22 Oct 2020 14:44:31 +0100 Subject: [PATCH] [ML] Support the unsigned_long type in data frame analytics Adds support for the unsigned_long type to data frame analytics. This type is handled in the same way as the long type. Values sent to the ML native processes are converted to floats and hence will lose accuracy when outside the range where a float can uniquely represent long values. Relates #60050 --- .../xpack/core/ml/dataframe/analyses/Types.java | 7 ++++--- .../ml/integration/DataFrameAnalysisCustomFeatureIT.java | 2 +- .../xpack/ml/integration/ExplainDataFrameAnalyticsIT.java | 4 ++-- .../elasticsearch/xpack/ml/integration/RegressionIT.java | 2 +- .../xpack/ml/integration/RunDataFrameAnalyticsIT.java | 4 ++-- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Types.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Types.java index 708db1a913f86..3e36f4c5cb241 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Types.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Types.java @@ -28,12 +28,13 @@ private Types() {} .collect(Collectors.toUnmodifiableSet()); private static final Set NUMERICAL_TYPES = - Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float")) + Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float", "unsigned_long")) .collect(Collectors.toUnmodifiableSet()); private static final Set DISCRETE_NUMERICAL_TYPES = - Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG) - .map(NumberType::typeName) + Stream.concat( + Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG).map(NumberType::typeName), + Stream.of("unsigned_long")) .collect(Collectors.toUnmodifiableSet()); private static final Set BOOL_TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE); diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/DataFrameAnalysisCustomFeatureIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/DataFrameAnalysisCustomFeatureIT.java index 0eacab902d63d..a93ea8d66a01b 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/DataFrameAnalysisCustomFeatureIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/DataFrameAnalysisCustomFeatureIT.java @@ -167,7 +167,7 @@ private static void createIndex(String index, boolean isDatastream) { " \"type\": \"double\"\n" + " }," + " \""+ DISCRETE_NUMERICAL_FIELD + "\": {\n" + - " \"type\": \"integer\"\n" + + " \"type\": \"unsigned_long\"\n" + " }," + " \""+ TEXT_FIELD + "\": {\n" + " \"type\": \"text\"\n" + diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ExplainDataFrameAnalyticsIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ExplainDataFrameAnalyticsIT.java index 20b82f8caa9c5..83d5c81886b20 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ExplainDataFrameAnalyticsIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/ExplainDataFrameAnalyticsIT.java @@ -52,7 +52,7 @@ public void testSourceQueryIsApplied() throws IOException { client().admin().indices().prepareCreate(sourceIndex) .setMapping( "numeric_1", "type=double", - "numeric_2", "type=float", + "numeric_2", "type=unsigned_long", "categorical", "type=keyword", "filtered_field", "type=keyword") .get(); @@ -64,7 +64,7 @@ public void testSourceQueryIsApplied() throws IOException { IndexRequest indexRequest = new IndexRequest(sourceIndex); indexRequest.source( "numeric_1", 1.0, - "numeric_2", 2.0, + "numeric_2", 2, "categorical", i % 2 == 0 ? "class_1" : "class_2", "filtered_field", i < 2 ? "bingo" : "rest"); // We tag bingo on the first two docs to ensure we have 2 classes bulkRequestBuilder.add(indexRequest); diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java index 454102ea6dab6..225898c2911f9 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java @@ -698,7 +698,7 @@ static void indexData(String sourceIndex, int numTrainingRows, int numNonTrainin " \"type\": \"double\"\n" + " }," + " \"" + DISCRETE_NUMERICAL_FEATURE_FIELD + "\": {\n" + - " \"type\": \"long\"\n" + + " \"type\": \"unsigned_long\"\n" + " }," + " \"" + DEPENDENT_VARIABLE_FIELD + "\": {\n" + " \"type\": \"double\"\n" + diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java index 5273083d575ee..fe78f7ddac240 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java @@ -71,7 +71,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception { String sourceIndex = "test-outlier-detection-with-few-docs"; client().admin().indices().prepareCreate(sourceIndex) - .setMapping("numeric_1", "type=double", "numeric_2", "type=float", "categorical_1", "type=keyword") + .setMapping("numeric_1", "type=double", "numeric_2", "type=unsigned_long", "categorical_1", "type=keyword") .get(); BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(); @@ -83,7 +83,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception { // We insert one odd value out of 5 for one feature String docId = i == 0 ? "outlier" : "normal" + i; indexRequest.id(docId); - indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1.0, "categorical_1", "foo_" + i); + indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1, "categorical_1", "foo_" + i); bulkRequestBuilder.add(indexRequest); } BulkResponse bulkResponse = bulkRequestBuilder.get();