EQL: Sequences will now support nano-timestamps (#76953)

With this change nanosecond-resolution timestamps can be used to drive the EQL sequence query.
elastic · Sep 1, 2021 · 9570236 · 9570236
1 parent 8b9c52e
commit 9570236
Show file tree

Hide file tree

Showing 23 changed files with 264 additions and 2,176 deletions.
diff --git a/x-pack/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/DataLoader.java b/x-pack/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/DataLoader.java
@@ -6,6 +6,7 @@
  */
 package org.elasticsearch.test.eql;
 
+import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertThat;
 
@@ -18,6 +19,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.function.Consumer;
 
 import org.apache.http.HttpHost;
 import org.apache.logging.log4j.LogManager;
@@ -44,20 +46,21 @@
  * Loads EQL dataset into ES.
  *
  * Checks for predefined indices:
- * - endgame-140 - for existing data
- * - extra       - additional data
+ * - endgame-140       - for existing data
+ * - endgame-140-nanos - same as endgame-140, but with nano-precision timestamps
+ * - extra             - additional data
  *
  * While the loader could be made generic, the queries are bound to each index and generalizing that would make things way too complicated.
  */
 public class DataLoader {
     public static final String TEST_INDEX = "endgame-140";
     public static final String TEST_EXTRA_INDEX = "extra";
-    public static final String DATE_NANOS_INDEX = "eql_date_nanos";
+    public static final String TEST_NANOS_INDEX = "endgame-140-nanos";
 
     private static final Map<String, String[]> replacementPatterns = Collections.unmodifiableMap(getReplacementPatterns());
 
-    private static final long FILETIME_EPOCH_DIFF = 11644473600000L;
-    private static final long FILETIME_ONE_MILLISECOND = 10 * 1000;
+    private static final long FILETIME_EPOCH_DIFF = 11644473600000L; // millis delta from the start of year 1601 (Windows filetime) to 1970
+    private static final long FILETIME_ONE_MILLISECOND = 10 * 1000; // Windows filetime is in 100-nanoseconds ticks
 
     // runs as java main
     private static boolean main = false;
@@ -86,33 +89,34 @@ public static void loadDatasetIntoEs(RestHighLevelClient client,
         //
         // Main Index
         //
-        load(client, TEST_INDEX, true, p);
+        load(client, TEST_INDEX, null, DataLoader::timestampToUnixMillis, p);
         //
         // Aux Index
         //
-        load(client, TEST_EXTRA_INDEX, false, p);
+        load(client, TEST_EXTRA_INDEX, null, null, p);
         //
         // Date_Nanos index
         //
-        // The data for this index are identical to the endgame-140.data with only the values for @timestamp changed.
+        // The data for this index is loaded from the same endgame-140.data sample, only having the mapping for @timestamp changed: the
+        // chosen Windows filetime timestamps (2017+) can coincidentally also be readily used as nano-resolution unix timestamps (1973+).
         // There are mixed values with and without nanos precision so that the filtering is properly tested for both cases.
-        load(client, DATE_NANOS_INDEX, false, p);
+        load(client, TEST_NANOS_INDEX, TEST_INDEX, DataLoader::timestampToUnixNanos, p);
     }
 
-    private static void load(RestHighLevelClient client, String indexName, boolean winFileTime,
+    private static void load(RestHighLevelClient client, String indexName, String dataName, Consumer<Map<String, Object>> datasetTransform,
                              CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p) throws IOException {
         String name = "/data/" + indexName + ".mapping";
         URL mapping = DataLoader.class.getResource(name);
         if (mapping == null) {
             throw new IllegalArgumentException("Cannot find resource " + name);
         }
-        name = "/data/" + indexName + ".data";
+        name = "/data/" + (dataName != null ? dataName : indexName) + ".data";
         URL data = DataLoader.class.getResource(name);
         if (data == null) {
             throw new IllegalArgumentException("Cannot find resource " + name);
         }
         createTestIndex(client, indexName, readMapping(mapping));
-        loadData(client, indexName, winFileTime, data, p);
+        loadData(client, indexName, datasetTransform, data, p);
     }
 
     private static void createTestIndex(RestHighLevelClient client, String indexName, String mapping) throws IOException {
@@ -147,8 +151,8 @@ private static CharSequence randomOf(String...values) {
     }
 
     @SuppressWarnings("unchecked")
-    private static void loadData(RestHighLevelClient client, String indexName, boolean winfileTime, URL resource,
-                                 CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p)
+    private static void loadData(RestHighLevelClient client, String indexName, Consumer<Map<String, Object>> datasetTransform,
+                                 URL resource, CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p)
         throws IOException {
         BulkRequest bulk = new BulkRequest();
         bulk.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
@@ -158,8 +162,8 @@ private static void loadData(RestHighLevelClient client, String indexName, boole
             for (Object item : list) {
                 assertThat(item, instanceOf(Map.class));
                 Map<String, Object> entry = (Map<String, Object>) item;
-                if (winfileTime) {
-                    transformDataset(entry);
+                if (datasetTransform != null) {
+                    datasetTransform.accept(entry);
                 }
                 bulk.add(new IndexRequest(indexName).source(entry, XContentType.JSON));
             }
@@ -175,14 +179,28 @@ private static void loadData(RestHighLevelClient client, String indexName, boole
         }
     }
 
-    private static void transformDataset(Map<String, Object> entry) {
+    private static void timestampToUnixMillis(Map<String, Object> entry) {
         Object object = entry.get("timestamp");
         assertThat(object, instanceOf(Long.class));
         Long ts = (Long) object;
         // currently this is windows filetime
         entry.put("@timestamp", winFileTimeToUnix(ts));
     }
 
+    private static void timestampToUnixNanos(Map<String, Object> entry) {
+        Object object = entry.get("timestamp");
+        assertThat(object, instanceOf(Long.class));
+        // interpret the value as nanos since the unix epoch
+        String timestamp = object.toString();
+        assertThat(timestamp.length(), greaterThan(12));
+        // avoid double approximations and BigDecimal ops
+        String millis = timestamp.substring(0, 12);
+        String milliFraction = timestamp.substring(12);
+        // strip the fractions right away if not actually present
+        entry.put("@timestamp", milliFraction.equals("000000") ? millis : millis + "." + milliFraction);
+        entry.put("timestamp", ((long) object)/1_000_000L);
+    }
+
     public static long winFileTimeToUnix(final long filetime) {
         long ts = (filetime / FILETIME_ONE_MILLISECOND);
         return ts - FILETIME_EPOCH_DIFF;

diff --git a/...ugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/EqlDateNanosSpecTestCase.java b/...ugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/EqlDateNanosSpecTestCase.java
@@ -7,23 +7,22 @@
 
 package org.elasticsearch.test.eql;
 
-import static org.elasticsearch.test.eql.DataLoader.DATE_NANOS_INDEX;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 
-import java.util.HashSet;
 import java.util.List;
 
-import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import static org.elasticsearch.test.eql.DataLoader.TEST_NANOS_INDEX;
 
 public abstract class EqlDateNanosSpecTestCase extends BaseEqlSpecTestCase {
 
     @ParametersFactory(shuffle = false, argumentFormatting = PARAM_FORMATTING)
     public static List<Object[]> readTestSpecs() throws Exception {
-        return asArray(EqlSpecLoader.load("/test_queries_date_nanos.toml", new HashSet<>()));
+        return asArray(EqlSpecLoader.load("/test_queries_date_nanos.toml", "/test_queries.toml"));
     }
 
     // constructor for "local" rest tests
     public EqlDateNanosSpecTestCase(String query, String name, long[] eventIds) {
-        this(DATE_NANOS_INDEX, query, name, eventIds);
+        this(TEST_NANOS_INDEX, query, name, eventIds);
     }
 
     // constructor for multi-cluster tests

diff --git a/...k/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/EqlExtraSpecTestCase.java b/...k/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/EqlExtraSpecTestCase.java
@@ -9,7 +9,6 @@
 
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 
-import java.util.HashSet;
 import java.util.List;
 
 import static org.elasticsearch.test.eql.DataLoader.TEST_EXTRA_INDEX;
@@ -18,7 +17,7 @@ public abstract class EqlExtraSpecTestCase extends BaseEqlSpecTestCase {
 
     @ParametersFactory(shuffle = false, argumentFormatting = PARAM_FORMATTING)
     public static List<Object[]> readTestSpecs() throws Exception {
-        return asArray(EqlSpecLoader.load("/test_extra.toml", new HashSet<>()));
+        return asArray(EqlSpecLoader.load("/test_extra.toml"));
     }
 
     // constructor for "local" rest tests

diff --git a/x-pack/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/EqlSpecLoader.java b/x-pack/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/EqlSpecLoader.java
@@ -15,6 +15,7 @@
 
 import java.io.InputStream;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -29,6 +30,15 @@ public static List<EqlSpec> load(String path, Set<String> uniqueTestNames) throw
         }
     }
 
+    public static List<EqlSpec> load(String ...paths) throws Exception {
+        Set<String> uniqueTestNames = new HashSet<>();
+        List<EqlSpec> specs = new ArrayList<>();
+        for (String path: paths) {
+            specs.addAll(load(path, uniqueTestNames));
+        }
+        return specs;
+    }
+
     private static void validateAndAddSpec(List<EqlSpec> specs, EqlSpec spec, Set<String> uniqueTestNames) {
         if (Strings.isNullOrEmpty(spec.name())) {
             throw new IllegalArgumentException("Read a test without a name value");
@@ -41,7 +51,7 @@ private static void validateAndAddSpec(List<EqlSpec> specs, EqlSpec spec, Set<St
         if (spec.expectedEventIds() == null) {
             throw new IllegalArgumentException("Read a test without a expected_event_ids value");
         }
-        if (uniqueTestNames.contains(spec.name())) {
+        if (uniqueTestNames.contains(spec.name())) { // TODO: scope it per file?
             throw new IllegalArgumentException("Found a test with the same name as another test: " + spec.name());
         } else {
             uniqueTestNames.add(spec.name());

diff --git a/x-pack/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/EqlSpecTestCase.java b/x-pack/plugin/eql/qa/common/src/main/java/org/elasticsearch/test/eql/EqlSpecTestCase.java
@@ -9,9 +9,7 @@
 
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 import static org.elasticsearch.test.eql.DataLoader.TEST_INDEX;
 
@@ -21,12 +19,7 @@ public abstract class EqlSpecTestCase extends BaseEqlSpecTestCase {
     public static List<Object[]> readTestSpecs() throws Exception {
 
         // Load EQL validation specs
-        Set<String> uniqueTestNames = new HashSet<>();
-        List<EqlSpec> specs = EqlSpecLoader.load("/test_queries.toml", uniqueTestNames);
-        specs.addAll(EqlSpecLoader.load("/additional_test_queries.toml", uniqueTestNames));
-        specs.addAll(EqlSpecLoader.load("/test_queries_date.toml", uniqueTestNames));
-
-        return asArray(specs);
+        return asArray(EqlSpecLoader.load("/test_queries.toml", "/additional_test_queries.toml", "/test_queries_date.toml"));
     }
 
     @Override

diff --git a/...ain/resources/data/eql_date_nanos.mapping → .../resources/data/endgame-140-nanos.mapping b/...ain/resources/data/eql_date_nanos.mapping → .../resources/data/endgame-140-nanos.mapping