improve query latency, add topN filter (#18)

* improve queries execution latency We swap bool with filter and remove the order->Count aggregation * add topn counter to query to elasticsearch Signed-off-by: inge4pres <francesco.gualazzi@elastic.co>
elastic · Jul 5, 2022 · c027c4e · c027c4e
1 parent 71ff030
commit c027c4e
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 60 deletions.
diff --git a/src/plugins/profiling/public/app.tsx b/src/plugins/profiling/public/app.tsx
@@ -78,7 +78,7 @@ function App({ fetchTopN, fetchElasticFlamechart, fetchPixiFlamechart }: Props)
           <EuiSpacer />
           <FlameGraphContext.Provider value={pixiFlamegraph}>
             <FlameGraphNavigation getter={fetchPixiFlamechart} setter={setPixiFlamegraph} />
-            <PixiFlamechart projectID={5} />
+            <PixiFlamechart projectID={'5'} />
           </FlameGraphContext.Provider>
         </>
       ),

diff --git a/src/plugins/profiling/public/services.ts b/src/plugins/profiling/public/services.ts
@@ -20,8 +20,10 @@ function getFetchQuery(seconds: string): HttpFetchQuery {
   return {
     index: 'profiling-events',
     projectID: 5,
-    timeFrom: unixTime - parseInt(seconds),
+    timeFrom: unixTime - parseInt(seconds, 10),
     timeTo: unixTime,
+    // TODO remove hard-coded value for topN items length and expose it through the UI
+    n: 100,
   } as HttpFetchQuery;
 }
 
@@ -33,11 +35,7 @@ export function getServices(core: CoreStart): Services {
     fetchTopN: async (type: string, seconds: string) => {
       try {
         const query = getFetchQuery(seconds);
-        const response = await core.http.get(
-          `${paths.TopN}/${type}`,
-          { query }
-        );
-        return response;
+        return await core.http.get(`${paths.TopN}/${type}`, { query });
       } catch (e) {
         return e;
       }
@@ -46,11 +44,7 @@ export function getServices(core: CoreStart): Services {
     fetchElasticFlamechart: async (seconds: string) => {
       try {
         const query = getFetchQuery(seconds);
-        const response = await core.http.get(
-          paths.FlamechartElastic,
-          { query }
-        );
-        return response;
+        return await core.http.get(paths.FlamechartElastic, { query });
       } catch (e) {
         return e;
       }
@@ -59,11 +53,7 @@ export function getServices(core: CoreStart): Services {
     fetchPixiFlamechart: async (seconds: string) => {
       try {
         const query = getFetchQuery(seconds);
-        const response = await core.http.get(
-          paths.FlamechartPixi,
-          { query }
-        );
-        return response;
+        return await core.http.get(paths.FlamechartPixi, { query });
       } catch (e) {
         return e;
       }

diff --git a/src/plugins/profiling/server/routes/mappings.ts b/src/plugins/profiling/server/routes/mappings.ts
@@ -10,13 +10,10 @@ import { AggregationsAggregationContainer } from '@elastic/elasticsearch/lib/api
 
 interface ProjectTimeQuery {
   bool: {
-    must: Array<
+    filter: Array<
       | {
           term: {
-            ProjectID: {
-              value: string;
-              boost: number;
-            };
+            ProjectID: string;
           };
         }
       | {
@@ -40,13 +37,10 @@ export function newProjectTimeQuery(
 ): ProjectTimeQuery {
   return {
     bool: {
-      must: [
+      filter: [
         {
           term: {
-            ProjectID: {
-              value: projectID,
-              boost: 1.0,
-            },
+            ProjectID: projectID,
           },
         },
         {
@@ -65,21 +59,23 @@ export function newProjectTimeQuery(
 }
 
 export function autoHistogramSumCountOnGroupByField(
-  searchField: string
+  searchField: string,
+  topNItems: number
 ): AggregationsAggregationContainer {
   return {
     auto_date_histogram: {
       field: '@timestamp',
-      buckets: 100,
+      buckets: 50,
     },
     aggs: {
       group_by: {
         terms: {
           field: searchField,
-          order: {
-            Count: 'desc',
-          },
-          size: 100,
+          // We remove the ordering since we will rely directly on the natural
+          // ordering of Elasticsearch: by default this will be the descending count
+          // of matched documents. This is not equal to the ordering by sum of Count field,
+          // but it's a good-enough approximation given the distribution of Count.
+          size: topNItems,
         },
         aggs: {
           Count: {

diff --git a/src/plugins/profiling/server/routes/search_topn.test.ts b/src/plugins/profiling/server/routes/search_topn.test.ts
@@ -9,7 +9,13 @@
 import { topNElasticSearchQuery } from './search_topn';
 import { DataRequestHandlerContext } from '../../../data/server';
 import { kibanaResponseFactory } from '../../../../core/server';
-import { AggregationsAggregationContainer } from '@elastic/elasticsearch/lib/api/types';
+import {
+  AggregationsAggregationContainer,
+  AggregationsHistogramAggregate,
+  AggregationsHistogramBucket,
+  AggregationsMultiBucketAggregateBase,
+  AggregationsStringTermsBucket,
+} from '@elastic/elasticsearch/lib/api/types';
 
 const anyQuery = 'any::query';
 const index = 'test';
@@ -36,23 +42,23 @@ function mockTopNData() {
                   histogram: {
                     buckets: [
                       {
-                        key_as_string: '1644506880',
-                        key: 1644506880000,
-                        doc_count: 700,
+                        key_as_string: '123',
+                        key: 123000,
+                        doc_count: 10,
                         group_by: {
                           buckets: [
                             {
-                              key: 'vyHke_Kdp2c05tXV7a_Rkg==',
+                              key: '::any::key::',
                               doc_count: 10,
                               Count: {
                                 value: 100.0,
                               },
-                            },
+                            } as AggregationsStringTermsBucket,
                           ],
-                        },
-                      },
+                        } as AggregationsMultiBucketAggregateBase<AggregationsStringTermsBucket>,
+                      } as AggregationsHistogramBucket,
                     ],
-                  },
+                  } as AggregationsHistogramAggregate,
                 },
               },
             }),

diff --git a/src/plugins/profiling/server/routes/search_topn.ts b/src/plugins/profiling/server/routes/search_topn.ts
@@ -8,19 +8,21 @@
 import { schema } from '@kbn/config-schema';
 import type { IRouter, KibanaResponseFactory } from 'kibana/server';
 import {
+  AggregationsHistogramAggregate,
   AggregationsHistogramBucket,
-  AggregationsMultiBucketAggregateBase,
+  AggregationsStringTermsBucket,
 } from '@elastic/elasticsearch/lib/api/types';
 import type { DataRequestHandlerContext } from '../../../data/server';
 import { getRemoteRoutePaths } from '../../common';
-import { newProjectTimeQuery, autoHistogramSumCountOnGroupByField } from './mappings';
+import { autoHistogramSumCountOnGroupByField, newProjectTimeQuery } from './mappings';
 
 export async function topNElasticSearchQuery(
   context: DataRequestHandlerContext,
   index: string,
   projectID: string,
   timeFrom: string,
   timeTo: string,
+  topNItems: number,
   searchField: string,
   response: KibanaResponseFactory
 ) {
@@ -30,23 +32,22 @@ export async function topNElasticSearchQuery(
     body: {
       query: newProjectTimeQuery(projectID, timeFrom, timeTo),
       aggs: {
-        histogram: autoHistogramSumCountOnGroupByField(searchField),
+        histogram: autoHistogramSumCountOnGroupByField(searchField, topNItems),
       },
     },
   });
 
   if (searchField === 'StackTraceID') {
-    const autoDateHistogram = resTopNStackTraces.body.aggregations
-      ?.histogram as AggregationsMultiBucketAggregateBase<AggregationsHistogramBucket>;
-
     const docIDs: string[] = [];
-    autoDateHistogram.buckets?.forEach((timeInterval: any) => {
-      timeInterval.group_by.buckets.forEach((stackTraceItem: any) => {
+    (
+      resTopNStackTraces.body.aggregations?.histogram as AggregationsHistogramAggregate
+    ).buckets.forEach((timeInterval: AggregationsHistogramBucket) => {
+      timeInterval.group_by.buckets.forEach((stackTraceItem: AggregationsStringTermsBucket) => {
         docIDs.push(stackTraceItem.key);
       });
     });
 
-    const resTraceMetadata = await esClient.mget<any>({
+    const resTraceMetadata = await esClient.mget({
       index: 'profiling-stacktraces',
       body: { ids: docIDs },
     });
@@ -76,23 +77,25 @@ export function queryTopNCommon(
       path: pathName,
       validate: {
         query: schema.object({
-          index: schema.maybe(schema.string()),
-          projectID: schema.maybe(schema.string()),
-          timeFrom: schema.maybe(schema.string()),
-          timeTo: schema.maybe(schema.string()),
+          index: schema.string(),
+          projectID: schema.string(),
+          timeFrom: schema.string(),
+          timeTo: schema.string(),
+          n: schema.number(),
         }),
       },
     },
     async (context, request, response) => {
-      const { index, projectID, timeFrom, timeTo } = request.query;
+      const { index, projectID, timeFrom, timeTo, n } = request.query;
 
       try {
         return await topNElasticSearchQuery(
           context,
-          index!,
-          projectID!,
-          timeFrom!,
-          timeTo!,
+          index,
+          projectID,
+          timeFrom,
+          timeTo,
+          n,
           searchField,
           response
         );