Skip to content

Commit

Permalink
[ML] Support the unsigned_long type in data frame analytics
Browse files Browse the repository at this point in the history
Adds support for the unsigned_long type to data frame analytics.

This type is handled in the same way as the long type.  Values
sent to the ML native processes are converted to floats and
hence will lose accuracy when outside the range where a float
can uniquely represent long values.

Relates elastic#60050
  • Loading branch information
droberts195 committed Oct 22, 2020
1 parent 3f8097b commit 40a1435
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ private Types() {}
.collect(Collectors.toUnmodifiableSet());

private static final Set<String> NUMERICAL_TYPES =
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float"))
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float", "unsigned_long"))
.collect(Collectors.toUnmodifiableSet());

private static final Set<String> DISCRETE_NUMERICAL_TYPES =
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG)
.map(NumberType::typeName)
Stream.concat(
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG).map(NumberType::typeName),
Stream.of("unsigned_long"))
.collect(Collectors.toUnmodifiableSet());

private static final Set<String> BOOL_TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ private static void createIndex(String index, boolean isDatastream) {
" \"type\": \"double\"\n" +
" }," +
" \""+ DISCRETE_NUMERICAL_FIELD + "\": {\n" +
" \"type\": \"integer\"\n" +
" \"type\": \"unsigned_long\"\n" +
" }," +
" \""+ TEXT_FIELD + "\": {\n" +
" \"type\": \"text\"\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void testSourceQueryIsApplied() throws IOException {
client().admin().indices().prepareCreate(sourceIndex)
.setMapping(
"numeric_1", "type=double",
"numeric_2", "type=float",
"numeric_2", "type=unsigned_long",
"categorical", "type=keyword",
"filtered_field", "type=keyword")
.get();
Expand All @@ -64,7 +64,7 @@ public void testSourceQueryIsApplied() throws IOException {
IndexRequest indexRequest = new IndexRequest(sourceIndex);
indexRequest.source(
"numeric_1", 1.0,
"numeric_2", 2.0,
"numeric_2", 2,
"categorical", i % 2 == 0 ? "class_1" : "class_2",
"filtered_field", i < 2 ? "bingo" : "rest"); // We tag bingo on the first two docs to ensure we have 2 classes
bulkRequestBuilder.add(indexRequest);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ static void indexData(String sourceIndex, int numTrainingRows, int numNonTrainin
" \"type\": \"double\"\n" +
" }," +
" \"" + DISCRETE_NUMERICAL_FEATURE_FIELD + "\": {\n" +
" \"type\": \"long\"\n" +
" \"type\": \"unsigned_long\"\n" +
" }," +
" \"" + DEPENDENT_VARIABLE_FIELD + "\": {\n" +
" \"type\": \"double\"\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
String sourceIndex = "test-outlier-detection-with-few-docs";

client().admin().indices().prepareCreate(sourceIndex)
.setMapping("numeric_1", "type=double", "numeric_2", "type=float", "categorical_1", "type=keyword")
.setMapping("numeric_1", "type=double", "numeric_2", "type=unsigned_long", "categorical_1", "type=keyword")
.get();

BulkRequestBuilder bulkRequestBuilder = client().prepareBulk();
Expand All @@ -83,7 +83,7 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
// We insert one odd value out of 5 for one feature
String docId = i == 0 ? "outlier" : "normal" + i;
indexRequest.id(docId);
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1.0, "categorical_1", "foo_" + i);
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1, "categorical_1", "foo_" + i);
bulkRequestBuilder.add(indexRequest);
}
BulkResponse bulkResponse = bulkRequestBuilder.get();
Expand Down

0 comments on commit 40a1435

Please sign in to comment.