[EEM][POC] The POC for creating entity-centric indices using entity d…

…efinitions (#183205) ## Summary This is a "proof of concept" for generating entity-centric indices for the OAM. This exposes an API (`/api/entities`) for creating "asset definitions" (`EntityDefinition`) that manages a transform and ingest pipeline to produce documents into an index which could be used to create a search experience or lookups for different services. ### Features - Data schema agnostic, works with known schemas OR custom logs - Supports defining multiple `identityFields` along with an `identityTemplate` for formatting the `asset.id` - Supports optional `identityFields` using `{ "field": "path-to-field", "optional": true }` definition instead of a `string`. - Supports defining key `metrics` with equations which are compatible with the SLO product - Supports adding `metadata` fields which will include multiple values. - Supports `metadata` fields can be re-mapped to a new destination path using `{ "source": "path-to-source-field", "limit": 1000, "destination": "path-to-destination-in-output" }` definition instead of a `string` - Supports adding `staticFields` which can also use template variables - Support fine grain control over the frequency and sync settings for the underlying transform - Installs the index template components and index template settings for the destination index - Allow the user to configure the index patterns and timestamp field along with the lookback - The documents for each definition will be stored in their own index (`.entities-observability.summary-v1.{defintion.id}`) ### Notes - We are currently considering adding a historical index which will track changes to the assets over time. If we choose to do this, the summary index would remain the same but we'd add a second transform with a group_by on the `definition.timestampField` and break the indices into monthly indexes (configurable in the settings). - We are looking into ways to add `firstSeenTimestamp`, this is a difficult due to scaling issue. Essentially, we would need to find the `minimum` timestamp for each entity which could be extremely costly on a large datasets. - There is nothing stopping you from creating an asset definition that uses the `.entities-observability.summary-v1.*` index pattern to create summaries of summaries... it can be very "meta". ### API - `POST /api/entities/definition` - Creates a new asset definition and starts the indexing. See examples below. - `DELETE /api/entities/definition/{id}` - Deletes the asset definition along with cleaning up the transform, ingest pipeline, and deletes the destination index. - `POST /api/entities/definition/{id}/_reset` - Resets the transform, ingest pipeline, and destination index. This is useful for upgrading asset definitions to new features. ## Example Definitions and Output Here is a definition for creating services for each of the custom log sources in the `fake_stack` dataset from `x-pack/packages/data-forge`. ```JSON POST kbn:/api/entities/definition { "id": "admin-console-logs-service", "name": "Services for Admin Console", "type": "service", "indexPatterns": ["kbn-data-forge-fake_stack.*"], "timestampField": "@timestamp", "lookback": "5m", "identityFields": ["log.logger"], "identityTemplate": "{{log.logger}}", "metadata": [ "tags", "host.name" ], "metrics": [ { "name": "logRate", "equation": "A / 5", "metrics": [ { "name": "A", "aggregation": "doc_count", "filter": "log.level: *" } ] }, { "name": "errorRate", "equation": "A / 5", "metrics": [ { "name": "A", "aggregation": "doc_count", "filter": "log.level: \"ERROR\"" } ] } ] } ``` Which produces: ```JSON { "host": { "name": [ "admin-console.prod.020", "admin-console.prod.010", "admin-console.prod.011", "admin-console.prod.001", "admin-console.prod.012", "admin-console.prod.002", "admin-console.prod.013", "admin-console.prod.003", "admin-console.prod.014", "admin-console.prod.004", "admin-console.prod.015", "admin-console.prod.016", "admin-console.prod.005", "admin-console.prod.017", "admin-console.prod.006", "admin-console.prod.018", "admin-console.prod.007", "admin-console.prod.019", "admin-console.prod.008", "admin-console.prod.009" ] }, "entity": { "latestTimestamp": "2024-05-10T22:04:51.481Z", "metric": { "logRate": 37.4, "errorRate": 1 }, "identity": { "log": { "logger": "admin-console" } }, "id": "admin-console", "indexPatterns": [ "kbn-data-forge-fake_stack.*" ], "definitionId": "admin-console-logs-service" }, "event": { "ingested": "2024-05-10T22:05:51.955691Z" }, "tags": [ "infra:admin-console" ] } ``` Here is an example of a definition for APM Services: ```JSON POST kbn:/api/entities/definition { "id": "apm-services", "name": "Services for APM", "type": "service", "indexPatterns": ["logs-*", "metrics-*"], "timestampField": "@timestamp", "lookback": "5m", "identityFields": ["service.name", "service.environment"], "identityTemplate": "{{service.name}}:{{service.environment}}", "metadata": [ "tags", "host.name" ], "metrics": [ { "name": "latency", "equation": "A", "metrics": [ { "name": "A", "aggregation": "avg", "field": "transaction.duration.histogram" } ] }, { "name": "throughput", "equation": "A / 5", "metrics": [ { "name": "A", "aggregation": "doc_count" } ] }, { "name": "failedTransRate", "equation": "A / B", "metrics": [ { "name": "A", "aggregation": "doc_count", "filter": "event.outcome: \"failure\"" }, { "name": "B", "aggregation": "doc_count", "filter": "event.outcome: *" } ] } ] } ``` Which produces: ```JSON { "host": { "name": [ "simianhacker's-macbook-pro" ] }, "entity": { "latestTimestamp": "2024-05-10T21:38:22.513Z", "metric": { "latency": 615276.8812785388, "throughput": 50.6, "failedTransRate": 0.0091324200913242 }, "identity": { "service": { "environment": "development", "name": "admin-console" } }, "id": "admin-console:development", "indexPatterns": [ "logs-*", "metrics-*" ], "definitionId": "apm-services" }, "event": { "ingested": "2024-05-10T21:39:33.636225Z" }, "tags": [ "_geoip_database_unavailable_GeoLite2-City.mmdb" ] } ``` ### Getting Started The easiest way to get started is to use the`kbn-data-forge` config below. Save this YAML to `~/Desktop/fake_stack.yaml` then run `node x-pack/scripts/data_forge.js --config ~/Desktop/fake_stack.yaml`. Then create a definition using the first example above. ```YAML --- elasticsearch: installKibanaUser: false kibana: installAssets: true host: "http://localhost:5601/kibana" indexing: dataset: "fake_stack" eventsPerCycle: 50 reduceWeekendTrafficBy: 0.5 schedule: # Start with good events - template: "good" start: "now-1d" end: "now-20m" eventsPerCycle: 50 randomness: 0.8 - template: "bad" start: "now-20m" end: "now-10m" eventsPerCycle: 50 randomness: 0.8 - template: "good" start: "now-10m" end: false eventsPerCycle: 50 randomness: 0.8 ``` --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
elastic · May 17, 2024 · 7ae07f8 · 7ae07f8
1 parent e24502d
commit 7ae07f8
Show file tree

Hide file tree

Showing 56 changed files with 1,790 additions and 23 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -390,6 +390,7 @@ src/plugins/embeddable @elastic/kibana-presentation
 x-pack/examples/embedded_lens_example @elastic/kibana-visualizations
 x-pack/plugins/encrypted_saved_objects @elastic/kibana-security
 x-pack/plugins/enterprise_search @elastic/enterprise-search-frontend
+x-pack/packages/kbn-entities-schema @elastic/obs-knowledge-team
 examples/error_boundary @elastic/appex-sharedux
 packages/kbn-es @elastic/kibana-operations
 packages/kbn-es-archiver @elastic/kibana-operations @elastic/appex-qa

diff --git a/package.json b/package.json
@@ -439,6 +439,7 @@
     "@kbn/embedded-lens-example-plugin": "link:x-pack/examples/embedded_lens_example",
     "@kbn/encrypted-saved-objects-plugin": "link:x-pack/plugins/encrypted_saved_objects",
     "@kbn/enterprise-search-plugin": "link:x-pack/plugins/enterprise_search",
+    "@kbn/entities-schema": "link:x-pack/packages/kbn-entities-schema",
     "@kbn/error-boundary-example-plugin": "link:examples/error_boundary",
     "@kbn/es-errors": "link:packages/kbn-es-errors",
     "@kbn/es-query": "link:packages/kbn-es-query",

diff --git a/tsconfig.base.json b/tsconfig.base.json
@@ -774,6 +774,8 @@
       "@kbn/encrypted-saved-objects-plugin/*": ["x-pack/plugins/encrypted_saved_objects/*"],
       "@kbn/enterprise-search-plugin": ["x-pack/plugins/enterprise_search"],
       "@kbn/enterprise-search-plugin/*": ["x-pack/plugins/enterprise_search/*"],
+      "@kbn/entities-schema": ["x-pack/packages/kbn-entities-schema"],
+      "@kbn/entities-schema/*": ["x-pack/packages/kbn-entities-schema/*"],
       "@kbn/error-boundary-example-plugin": ["examples/error_boundary"],
       "@kbn/error-boundary-example-plugin/*": ["examples/error_boundary/*"],
       "@kbn/es": ["packages/kbn-es"],

diff --git a/x-pack/packages/kbn-entities-schema/README.md b/x-pack/packages/kbn-entities-schema/README.md
@@ -0,0 +1,3 @@
+# @kbn/entities-schema
+
+The entities schema for the asset model for Observability
diff --git a/x-pack/packages/kbn-entities-schema/index.ts b/x-pack/packages/kbn-entities-schema/index.ts
@@ -0,0 +1,10 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export * from './src/schema/entity_definition';
+export * from './src/schema/entity';
+export * from './src/schema/common';
diff --git a/x-pack/packages/kbn-entities-schema/jest.config.js b/x-pack/packages/kbn-entities-schema/jest.config.js
@@ -0,0 +1,12 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+module.exports = {
+  preset: '@kbn/test',
+  rootDir: '../../..',
+  roots: ['<rootDir>/x-pack/packages/kbn-entities-schema'],
+};
diff --git a/x-pack/packages/kbn-entities-schema/kibana.jsonc b/x-pack/packages/kbn-entities-schema/kibana.jsonc
@@ -0,0 +1,5 @@
+{
+  "type": "shared-common",
+  "id": "@kbn/entities-schema",
+  "owner": "@elastic/obs-knowledge-team"
+}
diff --git a/x-pack/packages/kbn-entities-schema/package.json b/x-pack/packages/kbn-entities-schema/package.json
@@ -0,0 +1,6 @@
+{
+  "name": "@kbn/entities-schema",
+  "private": true,
+  "version": "1.0.0",
+  "license": "Elastic License 2.0"
+}
diff --git a/x-pack/packages/kbn-entities-schema/src/schema/common.ts b/x-pack/packages/kbn-entities-schema/src/schema/common.ts
@@ -0,0 +1,106 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { z } from 'zod';
+import moment from 'moment';
+
+export enum EntityType {
+  service = 'service',
+  host = 'host',
+  pod = 'pod',
+  node = 'node',
+}
+
+export const arrayOfStringsSchema = z.array(z.string());
+
+export const entityTypeSchema = z.nativeEnum(EntityType);
+
+export enum BasicAggregations {
+  avg = 'avg',
+  max = 'max',
+  min = 'min',
+  sum = 'sum',
+  cardinality = 'cardinality',
+  last_value = 'last_value',
+  std_deviation = 'std_deviation',
+}
+
+export const basicAggregationsSchema = z.nativeEnum(BasicAggregations);
+
+const metricNameSchema = z
+  .string()
+  .length(1)
+  .regex(/[a-zA-Z]/)
+  .toUpperCase();
+
+export const filterSchema = z.optional(z.string());
+
+export const basicMetricWithFieldSchema = z.object({
+  name: metricNameSchema,
+  aggregation: basicAggregationsSchema,
+  field: z.string(),
+  filter: filterSchema,
+});
+
+export const docCountMetricSchema = z.object({
+  name: metricNameSchema,
+  aggregation: z.literal('doc_count'),
+  filter: filterSchema,
+});
+
+export const durationSchema = z
+  .string()
+  .regex(/\d+[m|d|s|h]/)
+  .transform((val: string) => {
+    const parts = val.match(/(\d+)([m|s|h|d])/);
+    if (parts === null) {
+      throw new Error('Unable to parse duration');
+    }
+    const value = parseInt(parts[1], 10);
+    const unit = parts[2] as 'm' | 's' | 'h' | 'd';
+    const duration = moment.duration(value, unit);
+    return { ...duration, toJSON: () => val };
+  });
+
+export const percentileMetricSchema = z.object({
+  name: metricNameSchema,
+  aggregation: z.literal('percentile'),
+  field: z.string(),
+  percentile: z.number(),
+  filter: filterSchema,
+});
+
+export const metricSchema = z.discriminatedUnion('aggregation', [
+  basicMetricWithFieldSchema,
+  docCountMetricSchema,
+  percentileMetricSchema,
+]);
+
+export type Metric = z.infer<typeof metricSchema>;
+
+export const keyMetricSchema = z.object({
+  name: z.string(),
+  metrics: z.array(metricSchema),
+  equation: z.string(),
+});
+
+export type KeyMetric = z.infer<typeof keyMetricSchema>;
+
+export const metadataSchema = z
+  .object({
+    source: z.string(),
+    destination: z.optional(z.string()),
+    limit: z.optional(z.number().default(1000)),
+  })
+  .or(z.string().transform((value) => ({ source: value, destination: value, limit: 1000 })));
+
+export const identityFieldsSchema = z
+  .object({
+    field: z.string(),
+    optional: z.boolean(),
+  })
+  .or(z.string().transform((value) => ({ field: value, optional: false })));
diff --git a/x-pack/packages/kbn-entities-schema/src/schema/entity.ts b/x-pack/packages/kbn-entities-schema/src/schema/entity.ts
@@ -0,0 +1,21 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { z } from 'zod';
+import { arrayOfStringsSchema } from './common';
+
+export const entitySchema = z.intersection(
+  z.object({
+    entity: z.object({
+      id: z.string(),
+      indexPatterns: arrayOfStringsSchema,
+      identityFields: arrayOfStringsSchema,
+      metric: z.record(z.string(), z.number()),
+    }),
+  }),
+  z.record(z.string(), z.string().or(z.number()))
+);
diff --git a/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts b/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts
@@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { z } from 'zod';
+import {
+  arrayOfStringsSchema,
+  entityTypeSchema,
+  keyMetricSchema,
+  metadataSchema,
+  filterSchema,
+  durationSchema,
+  identityFieldsSchema,
+} from './common';
+
+export const entityDefinitionSchema = z.object({
+  id: z.string().regex(/^[\w-]+$/),
+  name: z.string(),
+  description: z.optional(z.string()),
+  type: entityTypeSchema,
+  filter: filterSchema,
+  indexPatterns: arrayOfStringsSchema,
+  identityFields: z.array(identityFieldsSchema),
+  identityTemplate: z.string(),
+  metadata: z.optional(z.array(metadataSchema)),
+  metrics: z.optional(z.array(keyMetricSchema)),
+  staticFields: z.optional(z.record(z.string(), z.string())),
+  lookback: durationSchema,
+  timestampField: z.string(),
+  settings: z.optional(
+    z.object({
+      syncField: z.optional(z.string()),
+      syncDelay: z.optional(z.string()),
+      frequency: z.optional(z.string()),
+    })
+  ),
+});
+
+export type EntityDefinition = z.infer<typeof entityDefinitionSchema>;
diff --git a/x-pack/packages/kbn-entities-schema/tsconfig.json b/x-pack/packages/kbn-entities-schema/tsconfig.json
@@ -0,0 +1,18 @@
+{
+  "extends": "../../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "target/types",
+    "types": [
+      "jest",
+      "node"
+    ]
+  },
+  "include": [
+    "**/*.ts"
+  ],
+  "exclude": [
+    "target/**/*"
+  ],
+  "kbn_references": [
+  ]
+}
diff --git a/x-pack/plugins/observability_solution/asset_manager/common/constants_entities.ts b/x-pack/plugins/observability_solution/asset_manager/common/constants_entities.ts
@@ -0,0 +1,13 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export const ENTITY_VERSION = 'v1';
+export const ENTITY_BASE_PREFIX = `.entities-observability.summary-${ENTITY_VERSION}`;
+export const ENTITY_TRANSFORM_PREFIX = `entities-observability-summary-${ENTITY_VERSION}`;
+export const ENTITY_DEFAULT_FREQUENCY = '1m';
+export const ENTITY_DEFAULT_SYNC_DELAY = '60s';
+export const ENTITY_API_PREFIX = '/api/entities';
diff --git a/...vability_solution/asset_manager/server/lib/entities/create_and_install_ingest_pipeline.ts b/...vability_solution/asset_manager/server/lib/entities/create_and_install_ingest_pipeline.ts
@@ -0,0 +1,39 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ElasticsearchClient, Logger } from '@kbn/core/server';
+import { EntityDefinition } from '@kbn/entities-schema';
+import { generateProcessors } from './ingest_pipeline/generate_processors';
+import { retryTransientEsErrors } from './helpers/retry';
+import { EntitySecurityException } from './errors/entity_security_exception';
+import { generateIngestPipelineId } from './ingest_pipeline/generate_ingest_pipeline_id';
+
+export async function createAndInstallIngestPipeline(
+  esClient: ElasticsearchClient,
+  definition: EntityDefinition,
+  logger: Logger
+) {
+  const processors = generateProcessors(definition);
+  const id = generateIngestPipelineId(definition);
+  try {
+    await retryTransientEsErrors(
+      () =>
+        esClient.ingest.putPipeline({
+          id,
+          processors,
+        }),
+      { logger }
+    );
+  } catch (e) {
+    logger.error(`Cannot create entity ingest pipeline for [${definition.id}] entity defintion`);
+    if (e.meta?.body?.error?.type === 'security_exception') {
+      throw new EntitySecurityException(e.meta.body.error.reason, definition);
+    }
+    throw e;
+  }
+  return id;
+}
diff --git a/.../observability_solution/asset_manager/server/lib/entities/create_and_install_transform.ts b/.../observability_solution/asset_manager/server/lib/entities/create_and_install_transform.ts
@@ -0,0 +1,30 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ElasticsearchClient, Logger } from '@kbn/core/server';
+import { EntityDefinition } from '@kbn/entities-schema';
+import { generateTransform } from './transform/generate_transform';
+import { retryTransientEsErrors } from './helpers/retry';
+import { EntitySecurityException } from './errors/entity_security_exception';
+
+export async function createAndInstallTransform(
+  esClient: ElasticsearchClient,
+  definition: EntityDefinition,
+  logger: Logger
+) {
+  const transform = generateTransform(definition);
+  try {
+    await retryTransientEsErrors(() => esClient.transform.putTransform(transform), { logger });
+  } catch (e) {
+    logger.error(`Cannot create entity transform for [${definition.id}] entity definition`);
+    if (e.meta?.body?.error?.type === 'security_exception') {
+      throw new EntitySecurityException(e.meta.body.error.reason, definition);
+    }
+    throw e;
+  }
+  return transform.transform_id;
+}
diff --git a/...gins/observability_solution/asset_manager/server/lib/entities/delete_entity_definition.ts b/...gins/observability_solution/asset_manager/server/lib/entities/delete_entity_definition.ts
@@ -0,0 +1,31 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { Logger, SavedObjectsClientContract } from '@kbn/core/server';
+import { EntityDefinition } from '@kbn/entities-schema';
+import { SO_ENTITY_DEFINITION_TYPE } from '../../saved_objects';
+import { EntityDefinitionNotFound } from './errors/entity_not_found';
+
+export async function deleteEntityDefinition(
+  soClient: SavedObjectsClientContract,
+  definition: EntityDefinition,
+  logger: Logger
+) {
+  const response = await soClient.find<EntityDefinition>({
+    type: SO_ENTITY_DEFINITION_TYPE,
+    page: 1,
+    perPage: 1,
+    filter: `${SO_ENTITY_DEFINITION_TYPE}.attributes.id:(${definition.id})`,
+  });
+
+  if (response.total === 0) {
+    logger.error(`Unable to delete entity definition [${definition.id}] because it doesn't exist.`);
+    throw new EntityDefinitionNotFound(`Entity defintion with [${definition.id}] not found.`);
+  }
+
+  await soClient.delete(SO_ENTITY_DEFINITION_TYPE, response.saved_objects[0].id);
+}