Terms query for Indicator Match rule (elastic#144511)

## Terms query for Indicator Match rule TODO: [] need more unit/integrations tests, but ready for review The indicator match rule will use terms query when it is possible to search for matches for threat-first-search and for events-first-search. ## How the match query worked: Example for threat-first-search. If we have matching conditions like: `host.ip ==== indicator.host.ip` or (`source.name === indicator.source.name` AND `host.name === indicator.host.name`) It will generate queries like: ``` match: {host.ip: "1"}, or match: {host.ip: "2"} or match: {host.ip: "3"} or (match: {source.name: "1"} and match: {host.name: "1"}) or (match: {source.name: "2"} and match: {host.name: "2"}) or (match: {source.name: "3"} and match: {host.name: "3"}) ``` Each match will also have `_name` fields like: `${threatId}_${threatIndex}_${threatFields}_${sourceField}` So and because it's 1:1 relation between match and response, later at enrichment stage will be clear which threat matches which event. ## Terms query. We do fetch info about mapping for fields which use for match conditions of the IM rule. Terms query doesn't support all field types, this is why there is some allowed list which field types. Terms query not applied for AND conditions. For example: Fields types host.ip - `ip` user.name - `keyword` user.description - `text` indicator.host.ip_range - `ip_range` `host.ip === indicator.host.ip` or `host.ip_range === indicator.host.ip` or (`source.name === indicator.source.name` AND `host.name === indicator.host.name`) It will generate queries like: ``` terms: {host.ip: ["1","2","3"]}, or match: {host.ip_range: "1"} // terms query support range fields, but it will be difficult later to understand which threat match which event, because we can have more than 1 response for this condition or match: {host.ip_range: "2"} or (match: {source.name: "1"} and match: {host.name: "1"}) or (match: {source.name: "2"} and match: {host.name: "2"}) or (match: {source.name: "3"} and match: {host.name: "3"}) ``` For terms query, we don't know which response matches with events, this is why we do match it back in the code. ## Other changes Threat-first-search - will do one extra request to have all matched threats. For example: The threat index has 1.000.000 documents. IM rule gets the first batch of 9.000 threats and builds a query to the events index. It returns 100 events (max_signal = 100). Then it tries to enrich those 100 events with threat info. The problem is that the original implementation will enrich with the only threats from this 9.000 batch. And it will ignore other matches in 1.000.000 threats. This way we do one extra request in the end from potential alerts to threat index. # Tests performance In the best case, it can improve performance by around 3x times. [Base](elastic#149113) Threat Indicators - 1.500.000 documents Source - 1.000.000 documents. 1 field for match condition <img width="557" alt="213484531-3ab68c61-c3f5-4e28-b2c4-c1e90a5b1775" src="https://user-images.githubusercontent.com/7609147/215526984-ff027ba1-2f64-49fe-8fe8-a23ff4eda4dc.png"> This PR: <img width="537" alt="Screenshot 2023-01-30 at 20 20 32" src="https://user-images.githubusercontent.com/7609147/215575128-730514ac-a186-4ab8-87fd-af2ea8f79cec.png"> --------- Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
benakansara · Feb 7, 2023 · 0c1cfb2 · 0c1cfb2
1 parent d85013e
commit 0c1cfb2
Show file tree

Hide file tree

Showing 16 changed files with 1,232 additions and 161 deletions.
diff --git a/...ty_solution/server/lib/detection_engine/signals/threat_mapping/build_threat_enrichment.ts b/...ty_solution/server/lib/detection_engine/signals/threat_mapping/build_threat_enrichment.ts
@@ -6,10 +6,16 @@
  */
 
 import type { SignalsEnrichment } from '../types';
-import { enrichSignalThreatMatches } from './enrich_signal_threat_matches';
-import type { BuildThreatEnrichmentOptions, GetMatchedThreats } from './types';
-import { getThreatList } from './get_threat_list';
+import {
+  enrichSignalThreatMatches,
+  getSignalMatchesFromThreatList,
+} from './enrich_signal_threat_matches';
+import type { BuildThreatEnrichmentOptions } from './types';
+import { buildThreatMappingFilter } from './build_threat_mapping_filter';
+import { getAllThreatListHits } from './get_threat_list';
 
+// we do want to make extra requests to the threat index to get enrichments from all threats
+// previously we were enriched alerts only from `currentThreatList` but not all threats
 export const buildThreatEnrichment = ({
   ruleExecutionLogger,
   services,
@@ -22,39 +28,45 @@ export const buildThreatEnrichment = ({
   reassignPitId,
   listClient,
   exceptionFilter,
+  threatMapping,
+  runtimeMappings,
 }: BuildThreatEnrichmentOptions): SignalsEnrichment => {
-  const getMatchedThreats: GetMatchedThreats = async (ids) => {
-    const matchedThreatsFilter = {
-      query: {
-        bool: {
-          filter: {
-            ids: { values: ids },
-          },
-        },
+  return async (signals) => {
+    const threatFiltersFromEvents = buildThreatMappingFilter({
+      threatMapping,
+      threatList: signals,
+      entryKey: 'field',
+      allowedFieldsForTermsQuery: {
+        source: {},
+        threat: {},
       },
-    };
-    const threatResponse = await getThreatList({
+    });
+
+    const threatListHits = await getAllThreatListHits({
       esClient: services.scopedClusterClient.asCurrentUser,
-      index: threatIndex,
-      language: threatLanguage,
-      perPage: undefined,
+      threatFilters: [...threatFilters, threatFiltersFromEvents],
       query: threatQuery,
+      language: threatLanguage,
+      index: threatIndex,
       ruleExecutionLogger,
-      searchAfter: undefined,
-      threatFilters: [...threatFilters, matchedThreatsFilter],
       threatListConfig: {
         _source: [`${threatIndicatorPath}.*`, 'threat.feed.*'],
         fields: undefined,
       },
       pitId,
       reassignPitId,
-      runtimeMappings: undefined,
+      runtimeMappings,
       listClient,
       exceptionFilter,
     });
 
-    return threatResponse.hits.hits;
-  };
+    const signalMatches = getSignalMatchesFromThreatList(threatListHits);
 
-  return (signals) => enrichSignalThreatMatches(signals, getMatchedThreats, threatIndicatorPath);
+    return enrichSignalThreatMatches(
+      signals,
+      () => Promise.resolve(threatListHits),
+      threatIndicatorPath,
+      signalMatches
+    );
+  };
 };
diff --git a/...on/server/lib/detection_engine/signals/threat_mapping/build_threat_mapping_filter.test.ts b/...on/server/lib/detection_engine/signals/threat_mapping/build_threat_mapping_filter.test.ts
@@ -332,7 +332,7 @@ describe('build_threat_mapping_filter', () => {
       const threatMapping = getThreatMappingMock();
       const threatListItem = getThreatListSearchResponseMock().hits.hits[0];
       const innerClause = createAndOrClauses({ threatMapping, threatListItem, entryKey: 'value' });
-      expect(innerClause).toEqual(getThreatMappingFilterShouldMock());
+      expect(innerClause).toEqual(getThreatMappingFilterShouldMock().bool.should);
     });
 
     test('it should filter out data from entries that do not have mappings', () => {
@@ -343,7 +343,7 @@ describe('build_threat_mapping_filter', () => {
         foo: 'bar',
       };
       const innerClause = createAndOrClauses({ threatMapping, threatListItem, entryKey: 'value' });
-      expect(innerClause).toEqual(getThreatMappingFilterShouldMock());
+      expect(innerClause).toEqual(getThreatMappingFilterShouldMock().bool.should);
     });
 
     test('it should return an empty boolean given an empty array', () => {
@@ -353,7 +353,7 @@ describe('build_threat_mapping_filter', () => {
         threatListItem,
         entryKey: 'value',
       });
-      expect(innerClause).toEqual({ bool: { minimum_should_match: 1, should: [] } });
+      expect(innerClause).toEqual([]);
     });
 
     test('it should return an empty boolean clause given an empty object for a threat list item', () => {
@@ -363,7 +363,7 @@ describe('build_threat_mapping_filter', () => {
         threatListItem: getThreatListItemMock({ _source: {}, fields: {} }),
         entryKey: 'value',
       });
-      expect(innerClause).toEqual({ bool: { minimum_should_match: 1, should: [] } });
+      expect(innerClause).toEqual([]);
     });
   });
 
@@ -446,6 +446,62 @@ describe('build_threat_mapping_filter', () => {
       };
       expect(mapping).toEqual(expected);
     });
+
+    test('it should use terms query if allowedFieldsForTermsQuery provided', () => {
+      const threatMapping = getThreatMappingMock();
+      const threatList = getThreatListSearchResponseMock().hits.hits;
+      const mapping = buildEntriesMappingFilter({
+        threatMapping,
+        threatList,
+        chunkSize: 1024,
+        entryKey: 'value',
+        allowedFieldsForTermsQuery: {
+          source: { 'source.ip': true },
+          threat: { 'source.ip': true },
+        },
+      });
+      const mock = { ...getThreatMappingFilterShouldMock() };
+      mock.bool.should.pop();
+
+      const expected: BooleanFilter = {
+        bool: {
+          should: [
+            mock,
+            {
+              terms: {
+                _name: '__SEP____SEP__source.ip__SEP__source.ip__SEP__tq',
+                'source.ip': ['127.0.0.1'],
+              },
+            },
+          ],
+          minimum_should_match: 1,
+        },
+      };
+      expect(mapping).toEqual(expected);
+    });
+
+    test('it should use match query if allowedFieldsForTermsQuery provided, but it is AND', () => {
+      const threatMapping = getThreatMappingMock();
+      const threatList = getThreatListSearchResponseMock().hits.hits;
+      const mapping = buildEntriesMappingFilter({
+        threatMapping,
+        threatList,
+        chunkSize: 1024,
+        entryKey: 'value',
+        allowedFieldsForTermsQuery: {
+          source: { 'host.name': true, 'host.ip': true },
+          threat: { 'host.name': true, 'host.ip': true },
+        },
+      });
+
+      const expected: BooleanFilter = {
+        bool: {
+          should: [getThreatMappingFilterShouldMock()],
+          minimum_should_match: 1,
+        },
+      };
+      expect(mapping).toEqual(expected);
+    });
   });
 
   describe('splitShouldClauses', () => {

diff --git a/...olution/server/lib/detection_engine/signals/threat_mapping/build_threat_mapping_filter.ts b/...olution/server/lib/detection_engine/signals/threat_mapping/build_threat_mapping_filter.ts
@@ -7,7 +7,11 @@
 
 import get from 'lodash/fp/get';
 import type { Filter } from '@kbn/es-query';
-import type { ThreatMapping } from '@kbn/securitysolution-io-ts-alerting-types';
+import type {
+  ThreatMapping,
+  ThreatMappingEntries,
+} from '@kbn/securitysolution-io-ts-alerting-types';
+import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types';
 import type {
   BooleanFilter,
   BuildEntriesMappingFilterOptions,
@@ -16,7 +20,9 @@ import type {
   CreateInnerAndClausesOptions,
   FilterThreatMappingOptions,
   SplitShouldClausesOptions,
+  TermQuery,
 } from './types';
+import { ThreatMatchQueryType } from './types';
 import { encodeThreatMatchNamedQuery } from './utils';
 
 export const MAX_CHUNK_SIZE = 1024;
@@ -26,6 +32,7 @@ export const buildThreatMappingFilter = ({
   threatList,
   chunkSize,
   entryKey = 'value',
+  allowedFieldsForTermsQuery,
 }: BuildThreatMappingFilterOptions): Filter => {
   const computedChunkSize = chunkSize ?? MAX_CHUNK_SIZE;
   if (computedChunkSize > 1024) {
@@ -36,6 +43,7 @@ export const buildThreatMappingFilter = ({
     threatList,
     chunkSize: computedChunkSize,
     entryKey,
+    allowedFieldsForTermsQuery,
   });
   const filterChunk: Filter = {
     meta: {
@@ -45,6 +53,7 @@ export const buildThreatMappingFilter = ({
     },
     query,
   };
+
   return filterChunk;
 };
 
@@ -91,6 +100,7 @@ export const createInnerAndClauses = ({
                     index: threatListItem._index,
                     field: threatMappingEntry.field,
                     value: threatMappingEntry.value,
+                    queryType: ThreatMatchQueryType.match,
                   }),
                 },
               },
@@ -108,8 +118,8 @@ export const createAndOrClauses = ({
   threatMapping,
   threatListItem,
   entryKey,
-}: CreateAndOrClausesOptions): BooleanFilter => {
-  const should = threatMapping.reduce<unknown[]>((accum, threatMap) => {
+}: CreateAndOrClausesOptions): QueryDslQueryContainer[] => {
+  const should = threatMapping.reduce<QueryDslQueryContainer[]>((accum, threatMap) => {
     const innerAndClauses = createInnerAndClauses({
       threatMappingEntries: threatMap.entries,
       threatListItem,
@@ -123,34 +133,76 @@ export const createAndOrClauses = ({
     }
     return accum;
   }, []);
-  return { bool: { should, minimum_should_match: 1 } };
+  return should;
 };
 
 export const buildEntriesMappingFilter = ({
   threatMapping,
   threatList,
   chunkSize,
   entryKey,
+  allowedFieldsForTermsQuery,
 }: BuildEntriesMappingFilterOptions): BooleanFilter => {
-  const combinedShould = threatList.reduce<BooleanFilter[]>((accum, threatListSearchItem) => {
-    const filteredEntries = filterThreatMapping({
-      threatMapping,
-      threatListItem: threatListSearchItem,
-      entryKey,
-    });
-    const queryWithAndOrClause = createAndOrClauses({
-      threatMapping: filteredEntries,
-      threatListItem: threatListSearchItem,
-      entryKey,
-    });
-    if (queryWithAndOrClause.bool.should.length !== 0) {
-      // These values can be 10k+ large, so using a push here for performance
-      accum.push(queryWithAndOrClause);
-    }
-    return accum;
-  }, []);
-  const should = splitShouldClauses({ should: combinedShould, chunkSize });
-  return { bool: { should, minimum_should_match: 1 } };
+  const allFieldAllowedForTermQuery = (entries: ThreatMappingEntries) =>
+    entries.every(
+      (entry) =>
+        allowedFieldsForTermsQuery?.source?.[entry.field] &&
+        allowedFieldsForTermsQuery?.threat?.[entry.value]
+    );
+  const combinedShould = threatMapping.reduce<{
+    match: QueryDslQueryContainer[];
+    term: TermQuery[];
+  }>(
+    (acc, threatMap) => {
+      if (threatMap.entries.length > 1 || !allFieldAllowedForTermQuery(threatMap.entries)) {
+        threatList.forEach((threatListSearchItem) => {
+          const filteredEntries = filterThreatMapping({
+            threatMapping: [threatMap],
+            threatListItem: threatListSearchItem,
+            entryKey,
+          });
+          const queryWithAndOrClause = createAndOrClauses({
+            threatMapping: filteredEntries,
+            threatListItem: threatListSearchItem,
+            entryKey,
+          });
+          if (queryWithAndOrClause.length !== 0) {
+            // These values can be 10k+ large, so using a push here for performance
+            acc.match.push(...queryWithAndOrClause);
+          }
+        });
+      } else {
+        const threatMappingEntry = threatMap.entries[0];
+        const threats: string[] = threatList
+          .map((threatListItem) => get(threatMappingEntry[entryKey], threatListItem.fields))
+          .filter((val) => val)
+          .map((val) => val[0]);
+        if (threats.length > 0) {
+          acc.term.push({
+            terms: {
+              _name: encodeThreatMatchNamedQuery({
+                field: threatMappingEntry.field,
+                value: threatMappingEntry.value,
+                queryType: ThreatMatchQueryType.term,
+              }),
+              [threatMappingEntry[entryKey === 'field' ? 'value' : 'field']]: threats,
+            },
+          });
+        }
+      }
+      return acc;
+    },
+    { match: [], term: [] }
+  );
+
+  const matchShould = splitShouldClauses({
+    should:
+      combinedShould.match.length > 0
+        ? [{ bool: { should: combinedShould.match, minimum_should_match: 1 } }]
+        : [],
+    chunkSize,
+  });
+  return { bool: { should: [...matchShould, ...combinedShould.term], minimum_should_match: 1 } };
 };
 
 export const splitShouldClauses = ({
@@ -168,7 +220,10 @@ export const splitShouldClauses = ({
         accum[chunkIndex] = { bool: { should: [], minimum_should_match: 1 } };
       }
       // Add to the existing array element. Using mutatious push here since these arrays can get very large such as 10k+ and this is going to be a hot code spot.
-      accum[chunkIndex].bool.should.push(item);
+      if (Array.isArray(accum[chunkIndex].bool?.should)) {
+        (accum[chunkIndex].bool?.should as QueryDslQueryContainer[]).push(item);
+      }
+
       return accum;
     }, []);
   }