diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 28de6bd904..92e802cd88 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -46,7 +46,7 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "elastic-package" && "$BUILDKITE_STEP_KEY" = export GCP_PROJECT_ID=${ELASTIC_PACKAGE_GCP_PROJECT_SECRET} fi -if [[ "$BUILDKITE_PIPELINE_SLUG" == "elastic-package" && "$BUILDKITE_STEP_KEY" == "integration-parallel-aws" ]]; then +if [[ "$BUILDKITE_PIPELINE_SLUG" == "elastic-package" && ("$BUILDKITE_STEP_KEY" == "integration-parallel-aws" || "$BUILDKITE_STEP_KEY" == "integration-parallel-aws_logs") ]]; then export ELASTIC_PACKAGE_AWS_SECRET_KEY=$(retry 5 vault kv get -field secret_key ${AWS_SERVICE_ACCOUNT_SECRET_PATH}) export ELASTIC_PACKAGE_AWS_ACCESS_KEY=$(retry 5 vault kv get -field access_key ${AWS_SERVICE_ACCOUNT_SECRET_PATH}) diff --git a/docs/howto/system_testing.md b/docs/howto/system_testing.md index 5a3c545be5..95f7312a5a 100644 --- a/docs/howto/system_testing.md +++ b/docs/howto/system_testing.md @@ -262,6 +262,65 @@ data "aws_ami" "latest-amzn" { Notice the use of the `TEST_RUN_ID` variable. It contains a unique ID, which can help differentiate resources created in potential concurrent test runs. +#### Terraform Outputs + +The outputs generated by the terraform service deployer can be accessed in the system test config using handlebars template. +For example, if a `SQS queue` is configured in terraform and if the `queue_url` is configured as output , it can be used in the test config as a handlebars template `{{TF_OUTPUT_queue_url}}` + +Sample Terraform definition + +``` +resource "aws_sqs_queue" "test" { + +} + +output "queue_url"{ + value = aws_sqs_queue.test.url +} +``` + +Sample system test config + +``` yaml +data_stream: + vars: + period: 5m + latency: 10m + queue_url: '{{TF_OUTPUT_queue_url}}' + tags_filter: |- + - key: Name + value: "elastic-package-test-{{TEST_RUN_ID}}" +``` + +For complex outputs from terraform you can use `{{TF_OUTPUT_root_key.nested_key}}` + +``` +output "root_key"{ + value = someoutput.nested_key_value +} +``` +``` json +{ + "root_key": { + "sensitive": false, + "type": [ + "object", + { + "nested_key": "string" + } + ], + "value": { + "nested_key": "this is a nested key" + } + } +} +``` +``` yaml +data_stream: + vars: + queue_url: '{{TF_OUTPUT_root_key.nested_key}}' +``` + #### Environment variables To use environment variables within the Terraform service deployer a `env.yml` file is required. diff --git a/internal/configuration/locations/locations.go b/internal/configuration/locations/locations.go index e6252e75a8..5e36081f13 100644 --- a/internal/configuration/locations/locations.go +++ b/internal/configuration/locations/locations.go @@ -32,6 +32,7 @@ var ( serviceLogsDir = filepath.Join(temporaryDir, "service_logs") kubernetesDeployerDir = filepath.Join(deployerDir, "kubernetes") + serviceOutputDir = filepath.Join(temporaryDir, "output") ) // LocationManager maintains an instance of a config path location @@ -90,6 +91,11 @@ func (loc LocationManager) ServiceLogDir() string { return filepath.Join(loc.stackPath, serviceLogsDir) } +// ServiceOutputDir returns the output directory +func (loc LocationManager) ServiceOutputDir() string { + return filepath.Join(loc.stackPath, serviceOutputDir) +} + // FieldsCacheDir returns the directory with cached fields func (loc LocationManager) FieldsCacheDir() string { return filepath.Join(loc.stackPath, fieldsCachedDir) diff --git a/internal/testrunner/runners/system/runner.go b/internal/testrunner/runners/system/runner.go index ba019459c3..8d8349f3b7 100644 --- a/internal/testrunner/runners/system/runner.go +++ b/internal/testrunner/runners/system/runner.go @@ -197,6 +197,13 @@ func (r *runner) runTestPerVariant(result *testrunner.ResultComposer, locationMa ctxt.Logs.Folder.Local = locationManager.ServiceLogDir() ctxt.Logs.Folder.Agent = ServiceLogsAgentDir ctxt.Test.RunID = createTestRunID() + + outputDir, err := createOutputDir(locationManager, ctxt.Test.RunID) + if err != nil { + return nil, fmt.Errorf("could not create output dir for terraform deployer %w", err) + } + ctxt.OutputDir = outputDir + testConfig, err := newConfig(filepath.Join(r.options.TestFolder.Path, cfgFile), ctxt, variantName) if err != nil { return result.WithError(errors.Wrapf(err, "unable to load system test case file '%s'", cfgFile)) @@ -224,6 +231,14 @@ func (r *runner) runTestPerVariant(result *testrunner.ResultComposer, locationMa return partial, nil } +func createOutputDir(locationManager *locations.LocationManager, runId string) (string, error) { + outputDir := filepath.Join(locationManager.ServiceOutputDir(), runId) + if err := os.MkdirAll(outputDir, 0755); err != nil { + return "", fmt.Errorf("failed to create output directory: %w", err) + } + return outputDir, nil +} + func createTestRunID() string { return fmt.Sprintf("%d", rand.Intn(testRunMaxID-testRunMinID)+testRunMinID) } diff --git a/internal/testrunner/runners/system/servicedeployer/_static/terraform_deployer.yml b/internal/testrunner/runners/system/servicedeployer/_static/terraform_deployer.yml index 62c094403d..794446d1ba 100644 --- a/internal/testrunner/runners/system/servicedeployer/_static/terraform_deployer.yml +++ b/internal/testrunner/runners/system/servicedeployer/_static/terraform_deployer.yml @@ -13,3 +13,4 @@ services: - TF_VAR_REPO=${REPO:-unknown} volumes: - ${TF_DIR}:/stage + - ${TF_OUTPUT_DIR}:/output diff --git a/internal/testrunner/runners/system/servicedeployer/_static/terraform_deployer_run.sh b/internal/testrunner/runners/system/servicedeployer/_static/terraform_deployer_run.sh index 2085bb078f..b50cd53196 100644 --- a/internal/testrunner/runners/system/servicedeployer/_static/terraform_deployer_run.sh +++ b/internal/testrunner/runners/system/servicedeployer/_static/terraform_deployer_run.sh @@ -22,6 +22,8 @@ terraform init terraform plan terraform apply -auto-approve && touch /tmp/tf-applied +terraform output -json > /output/tfOutputValues.json + echo "Terraform definitions applied." set +x diff --git a/internal/testrunner/runners/system/servicedeployer/compose.go b/internal/testrunner/runners/system/servicedeployer/compose.go index b62eb77899..69c10fb361 100644 --- a/internal/testrunner/runners/system/servicedeployer/compose.go +++ b/internal/testrunner/runners/system/servicedeployer/compose.go @@ -158,6 +158,10 @@ func (s *dockerComposeDeployedService) TearDown() error { if err != nil { logger.Errorf("could not remove the service logs (path: %s)", s.ctxt.Logs.Folder.Local) } + // Remove the outputs generated by the service container + if err = os.RemoveAll(s.ctxt.OutputDir); err != nil { + logger.Errorf("could not remove the temporary output files %w", err) + } }() p, err := compose.NewProject(s.project, s.ymlPaths...) diff --git a/internal/testrunner/runners/system/servicedeployer/context.go b/internal/testrunner/runners/system/servicedeployer/context.go index fcb048a439..8ef382cb60 100644 --- a/internal/testrunner/runners/system/servicedeployer/context.go +++ b/internal/testrunner/runners/system/servicedeployer/context.go @@ -60,6 +60,9 @@ type ServiceContext struct { // CustomProperties store additional data used to boot up the service, e.g. AWS credentials. CustomProperties map[string]interface{} + + // Directory to store any outputs generated + OutputDir string } // Aliases method returned aliases to properties of the service context. diff --git a/internal/testrunner/runners/system/servicedeployer/terraform.go b/internal/testrunner/runners/system/servicedeployer/terraform.go index 4ae68e2b3b..6a191d244e 100644 --- a/internal/testrunner/runners/system/servicedeployer/terraform.go +++ b/internal/testrunner/runners/system/servicedeployer/terraform.go @@ -6,6 +6,7 @@ package servicedeployer import ( _ "embed" + "encoding/json" "fmt" "os" "path/filepath" @@ -26,6 +27,8 @@ const ( terraformDeployerYml = "terraform-deployer.yml" terraformDeployerDockerfile = "Dockerfile" terraformDeployerRun = "run.sh" + terraformOutputPrefix = "TF_OUTPUT_" + terraformOutputJsonFile = "tfOutputValues.json" ) //go:embed _static/terraform_deployer.yml @@ -42,6 +45,43 @@ type TerraformServiceDeployer struct { definitionsDir string } +// addTerraformOutputs method reads the terraform outputs generated in the json format and +// adds them to the custom properties of ServiceContext and can be used in the handlebars template +// like `{{TF_OUTPUT_queue_url}}` where `queue_url` is the output configured +func addTerraformOutputs(outCtxt ServiceContext) error { + // Read the `output.json` file where terraform outputs are generated + outputFile := filepath.Join(outCtxt.OutputDir, terraformOutputJsonFile) + content, err := os.ReadFile(outputFile) + if err != nil { + return fmt.Errorf("failed to read terraform output file: %w", err) + } + + // https://github.com/hashicorp/terraform/blob/v1.4.6/internal/command/views/output.go#L217-L222 + type OutputMeta struct { + Value interface{} `json:"value"` + } + + // Unmarshall the data into `terraformOutputs` + logger.Debug("Unmarshalling terraform output json") + var terraformOutputs map[string]OutputMeta + if err = json.Unmarshal(content, &terraformOutputs); err != nil { + return fmt.Errorf("error during json Unmarshal %w", err) + } + + if len(terraformOutputs) == 0 { + return nil + } + + if outCtxt.CustomProperties == nil { + outCtxt.CustomProperties = make(map[string]any, len(terraformOutputs)) + } + // Prefix variables names with TF_OUTPUT_ + for k, outputs := range terraformOutputs { + outCtxt.CustomProperties[terraformOutputPrefix+k] = outputs.Value + } + return nil +} + // NewTerraformServiceDeployer creates an instance of TerraformServiceDeployer. func NewTerraformServiceDeployer(definitionsDir string) (*TerraformServiceDeployer, error) { return &TerraformServiceDeployer{ @@ -117,6 +157,11 @@ func (tsd TerraformServiceDeployer) SetUp(inCtxt ServiceContext) (DeployedServic } outCtxt.Agent.Host.NamePrefix = "docker-fleet-agent" + + err = addTerraformOutputs(outCtxt) + if err != nil { + return nil, fmt.Errorf("could not handle terraform output %w", err) + } service.ctxt = outCtxt return &service, nil } diff --git a/internal/testrunner/runners/system/servicedeployer/terraform_env.go b/internal/testrunner/runners/system/servicedeployer/terraform_env.go index ecc6887212..93256b3fbf 100644 --- a/internal/testrunner/runners/system/servicedeployer/terraform_env.go +++ b/internal/testrunner/runners/system/servicedeployer/terraform_env.go @@ -14,6 +14,7 @@ import ( const ( tfDir = "TF_DIR" + tfOutputDir = "TF_OUTPUT_DIR" tfTestRunID = "TF_VAR_TEST_RUN_ID" envYmlFile = "env.yml" @@ -24,6 +25,7 @@ func (tsd TerraformServiceDeployer) buildTerraformExecutorEnvironment(ctxt Servi vars[serviceLogsDirEnv] = ctxt.Logs.Folder.Local vars[tfTestRunID] = ctxt.Test.RunID vars[tfDir] = tsd.definitionsDir + vars[tfOutputDir] = ctxt.OutputDir var pairs []string for k, v := range vars { diff --git a/internal/testrunner/runners/system/servicedeployer/terraform_test.go b/internal/testrunner/runners/system/servicedeployer/terraform_test.go new file mode 100644 index 0000000000..30d822f455 --- /dev/null +++ b/internal/testrunner/runners/system/servicedeployer/terraform_test.go @@ -0,0 +1,128 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package servicedeployer + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAddTerraformOutputs(t *testing.T) { + var testCases = []struct { + testName string + err string + ctxt ServiceContext + runId string + content []byte + expectedProps map[string]interface{} + }{ + { + testName: "single_value_output", + runId: "99999", + ctxt: ServiceContext{ + Test: struct{ RunID string }{"99999"}, + }, + content: []byte( + `{ + "queue_url": { + "sensitive": false, + "type": "string", + "value": "https://sqs.us-east-1.amazonaws.com/1234654/elastic-package-aws-logs-queue-someId" + } + }`, + ), + expectedProps: map[string]interface{}{ + "TF_OUTPUT_queue_url": "https://sqs.us-east-1.amazonaws.com/1234654/elastic-package-aws-logs-queue-someId", + }, + }, + { + testName: "multiple_value_output", + runId: "23465", + ctxt: ServiceContext{ + Test: struct{ RunID string }{"23465"}, + }, + content: []byte( + `{ + "queue_url": { + "sensitive": false, + "type": "string", + "value": "https://sqs.us-east-1.amazonaws.com/1234654/elastic-package-aws-logs-queue-someId" + }, + "instance_id": { + "sensitive": false, + "type": "string", + "value": "some-random-id" + } + }`, + ), + expectedProps: map[string]interface{}{ + "TF_OUTPUT_queue_url": "https://sqs.us-east-1.amazonaws.com/1234654/elastic-package-aws-logs-queue-someId", + "TF_OUTPUT_instance_id": "some-random-id", + }, + }, + { + testName: "complex_value_output", + runId: "078907890", + ctxt: ServiceContext{ + Test: struct{ RunID string }{"078907890"}, + }, + content: []byte( + `{ + "queue_url": { + "sensitive": false, + "type": "string", + "value": "https://sqs.us-east-1.amazonaws.com/1234654/elastic-package-aws-logs-queue-someId" + }, + "triangle_output": { + "sensitive": false, + "type": [ + "object", + { + "description": "string", + "s_one": "number", + "s_three": "number", + "s_two": "number" + } + ], + "value": { + "value": "this is a triangle", + "s_one": 1, + "s_three": 2.5, + "s_two": 2.5 + } + } + }`, + ), + expectedProps: map[string]interface{}{ + "TF_OUTPUT_queue_url": "https://sqs.us-east-1.amazonaws.com/1234654/elastic-package-aws-logs-queue-someId", + "TF_OUTPUT_triangle_output": map[string]any{ + "s_one": 1.0, + "s_three": 2.5, + "s_two": 2.5, + "value": "this is a triangle", + }, + }, + }, + } + + t.Parallel() + for _, tc := range testCases { + + t.Run(tc.testName, func(t *testing.T) { + tc.ctxt.CustomProperties = make(map[string]interface{}) + tc.ctxt.OutputDir = t.TempDir() + + if err := os.WriteFile(tc.ctxt.OutputDir+"/tfOutputValues.json", tc.content, 0777); err != nil { + t.Fatal(err) + } + + // Test that the terraform output values are generated correctly + addTerraformOutputs(tc.ctxt) + assert.Equal(t, tc.expectedProps, tc.ctxt.CustomProperties) + }) + } +} diff --git a/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/deploy/tf/main.tf b/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/deploy/tf/main.tf index 00f3ba0c25..32738ec4d4 100644 --- a/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/deploy/tf/main.tf +++ b/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/deploy/tf/main.tf @@ -31,3 +31,7 @@ data "aws_ami" "latest-amzn" { values = ["amzn2-ami-minimal-hvm-*-ebs"] } } + +output "instance_id" { + value = aws_instance.i.id +} \ No newline at end of file diff --git a/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/deploy/tf/variables.tf b/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/deploy/tf/variables.tf index e4b95d471b..9d78b1b3c4 100644 --- a/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/deploy/tf/variables.tf +++ b/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/deploy/tf/variables.tf @@ -1,16 +1,16 @@ variable "BRANCH" { description = "Branch name or pull request for tagging purposes" - default = "unknown-branch" + default = "unknown-branch" } variable "BUILD_ID" { description = "Build ID in the CI for tagging purposes" - default = "unknown-build" + default = "unknown-build" } variable "CREATED_DATE" { description = "Creation date in epoch time for tagging purposes" - default = "unknown-date" + default = "unknown-date" } variable "ENVIRONMENT" { diff --git a/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/test/system/test-default-config.yml b/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/test/system/test-default-config.yml index 15d7d57b30..edbd956211 100644 --- a/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/test/system/test-default-config.yml +++ b/test/packages/parallel/aws/data_stream/ec2_metrics/_dev/test/system/test-default-config.yml @@ -7,6 +7,7 @@ data_stream: vars: period: 5m latency: 10m + instance_id: '{{TF_OUTPUT_instance_id}}' tags_filter: |- - key: Name value: "elastic-package-test-{{TEST_RUN_ID}}" diff --git a/test/packages/parallel/aws/data_stream/ec2_metrics/manifest.yml b/test/packages/parallel/aws/data_stream/ec2_metrics/manifest.yml index 1a78ff9099..95f5b4f1a4 100644 --- a/test/packages/parallel/aws/data_stream/ec2_metrics/manifest.yml +++ b/test/packages/parallel/aws/data_stream/ec2_metrics/manifest.yml @@ -22,6 +22,13 @@ streams: multi: false required: false show_user: false + - name: instance_id + type: text + title: Instance ID + multi: false + required: true + show_user: true + description: ID of the EC2 instance. - name: tags_filter type: yaml title: Tags Filter diff --git a/test/packages/parallel/aws_logs/_dev/build/build.yml b/test/packages/parallel/aws_logs/_dev/build/build.yml new file mode 100644 index 0000000000..47cbed9fed --- /dev/null +++ b/test/packages/parallel/aws_logs/_dev/build/build.yml @@ -0,0 +1,3 @@ +dependencies: + ecs: + reference: git@v8.0.0 diff --git a/test/packages/parallel/aws_logs/_dev/build/docs/README.md b/test/packages/parallel/aws_logs/_dev/build/docs/README.md new file mode 100644 index 0000000000..1b16353c9f --- /dev/null +++ b/test/packages/parallel/aws_logs/_dev/build/docs/README.md @@ -0,0 +1,28 @@ +# Custom AWS Log Integration + +The custom AWS input integration offers users two ways to collect logs from AWS: from an [S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerLogs.html) (with or without SQS notification) and from [CloudWatch](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/WhatIsCloudWatchLogs.html). +Custom ingest pipelines may be added by adding the name to the pipeline configuration option, creating custom ingest pipelines can be done either through the API or the [Ingest Node Pipeline UI](/app/management/ingest/ingest_pipelines/). + +## Collecting logs from S3 bucket + +When collecting logs from S3 bucket is enabled, users can retrieve logs from S3 +objects that are pointed to by S3 notification events read from an SQS queue or +directly polling list of S3 objects in an S3 bucket. + +The use of SQS notification is preferred: polling list of S3 objects is +expensive in terms of performance and costs and should be preferably used only +when no SQS notification can be attached to the S3 buckets. This input +integration also supports S3 notification from SNS to SQS. + +SQS notification method is enabled setting `queue_url` configuration value. S3 +bucket list polling method is enabled setting `bucket_arn` configuration value +and `number_of_workers` value. Both `queue_url` and `bucket_arn` cannot be set +at the same time and at least one of the two value must be set. + +## Collecting logs from CloudWatch + +When collecting logs from CloudWatch is enabled, users can retrieve logs from +all log streams in a specific log group. `filterLogEvents` AWS API is used to +list log events from the specified log group. Amazon CloudWatch Logs can be used +to store log files from Amazon Elastic Compute Cloud(EC2), AWS CloudTrail, +Route53, and other sources. diff --git a/test/packages/parallel/aws_logs/changelog.yml b/test/packages/parallel/aws_logs/changelog.yml new file mode 100644 index 0000000000..c1469c65be --- /dev/null +++ b/test/packages/parallel/aws_logs/changelog.yml @@ -0,0 +1,65 @@ +- version: "999.999.999" + changes: + - description: Change test public IPs to the supported subset + type: bugfix + link: https://github.com/elastic/integrations/pull/2327 +- version: "0.4.0" + changes: + - description: Add multiline support for using s3 input + type: enhancement + link: https://github.com/elastic/integrations/pull/6081 +- version: "0.3.3" + changes: + - description: Added categories and/or subcategories. + type: enhancement + link: https://github.com/elastic/integrations/pull/5123 +- version: "0.3.2" + changes: + - description: Add required field number of workers to support non aws buckets, and add default value. + type: enhancement + link: https://github.com/elastic/integrations/pull/4917 +- version: "0.3.1" + changes: + - description: Add latency config parameter for aws-cloudwatch input + type: bugfix + link: https://github.com/elastic/integrations/pull/4859 +- version: "0.3.0" + changes: + - description: Expose Default Region setting to UI + type: enhancement + link: https://github.com/elastic/integrations/pull/4158 +- version: "0.2.5" + changes: + - description: Set default endpoint to empty string + type: bugfix + link: https://github.com/elastic/integrations/pull/4103 +- version: "0.2.4" + changes: + - description: Fix proxy URL documentation rendering. + type: bugfix + link: https://github.com/elastic/integrations/pull/3881 +- version: "0.2.3" + changes: + - description: Fix misspelling of Log Stream Prefix variable in manifest for aws-cloudwatch input + type: bugfix + link: https://github.com/elastic/integrations/pull/3610 +- version: "0.2.2" + changes: + - description: update readme file + type: bugfix + link: https://github.com/elastic/integrations/pull/2819 +- version: "0.2.1" + changes: + - description: Add kibana version constraint + type: bugfix + link: https://github.com/elastic/integrations/pull/2819 +- version: "0.2.0" + changes: + - description: Move s3 input and cloudwatch input into the same generic data stream + type: enhancement + link: https://github.com/elastic/integrations/pull/2710 +- version: "0.1.0" + changes: + - description: initial release + type: enhancement # can be one of: enhancement, bugfix, breaking-change + link: https://github.com/elastic/integrations/pull/2353 diff --git a/test/packages/parallel/aws_logs/data_stream/generic/_dev/deploy/tf/env.yml b/test/packages/parallel/aws_logs/data_stream/generic/_dev/deploy/tf/env.yml new file mode 100644 index 0000000000..b795fcdeb2 --- /dev/null +++ b/test/packages/parallel/aws_logs/data_stream/generic/_dev/deploy/tf/env.yml @@ -0,0 +1,9 @@ +version: '2.3' +services: + terraform: + environment: + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} + - AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN} + - AWS_PROFILE=${AWS_PROFILE} + - AWS_REGION=${AWS_REGION:-us-east-1} diff --git a/test/packages/parallel/aws_logs/data_stream/generic/_dev/deploy/tf/main.tf b/test/packages/parallel/aws_logs/data_stream/generic/_dev/deploy/tf/main.tf new file mode 100644 index 0000000000..f7d5618cd3 --- /dev/null +++ b/test/packages/parallel/aws_logs/data_stream/generic/_dev/deploy/tf/main.tf @@ -0,0 +1,64 @@ +variable "TEST_RUN_ID" { + default = "detached" +} + +provider "aws" { + default_tags { + tags = { + environment = var.ENVIRONMENT + repo = var.REPO + branch = var.BRANCH + build = var.BUILD_ID + created_date = var.CREATED_DATE + } + } +} + +resource "aws_s3_bucket" "bucket" { + bucket = "elastic-package-aws-logs-bucket-${var.TEST_RUN_ID}" +} + +resource "aws_sqs_queue" "queue" { + name = "elastic-package-aws-logs-queue-${var.TEST_RUN_ID}" + policy = < + If the host is a container. + + - name: os.build + type: keyword + example: "18D109" + description: > + OS build information. + + - name: os.codename + type: keyword + example: "stretch" + description: > + OS codename, if any. + diff --git a/test/packages/parallel/aws_logs/data_stream/generic/fields/base-fields.yml b/test/packages/parallel/aws_logs/data_stream/generic/fields/base-fields.yml new file mode 100644 index 0000000000..c124b09648 --- /dev/null +++ b/test/packages/parallel/aws_logs/data_stream/generic/fields/base-fields.yml @@ -0,0 +1,23 @@ +- name: data_stream.type + type: constant_keyword + description: Data stream type. +- name: data_stream.dataset + type: constant_keyword + description: Data stream dataset. +- name: data_stream.namespace + type: constant_keyword + description: Data stream namespace. +- name: '@timestamp' + type: date + description: Event timestamp. +- name: event.module + type: constant_keyword + description: Event module + value: aws +- name: event.dataset + type: constant_keyword + description: Event dataset + value: aws_logs.generic +- name: input.type + type: keyword + description: Type of Filebeat input. diff --git a/test/packages/parallel/aws_logs/data_stream/generic/fields/ecs.yml b/test/packages/parallel/aws_logs/data_stream/generic/fields/ecs.yml new file mode 100644 index 0000000000..d1c52ea0ec --- /dev/null +++ b/test/packages/parallel/aws_logs/data_stream/generic/fields/ecs.yml @@ -0,0 +1,10 @@ +- external: ecs + name: ecs.version +- external: ecs + name: log.level +- external: ecs + name: message +- name: event.original + external: ecs +- name: tags + external: ecs diff --git a/test/packages/parallel/aws_logs/data_stream/generic/fields/fields.yml b/test/packages/parallel/aws_logs/data_stream/generic/fields/fields.yml new file mode 100644 index 0000000000..c43e4dcc52 --- /dev/null +++ b/test/packages/parallel/aws_logs/data_stream/generic/fields/fields.yml @@ -0,0 +1,28 @@ +- name: aws.cloudwatch + type: group + fields: + - name: message + type: text + description: | + CloudWatch log message. +- name: aws.s3 + type: group + fields: + - name: bucket + type: group + fields: + - name: name + type: keyword + description: Name of the S3 bucket that this log retrieved from. + - name: arn + type: keyword + description: ARN of the S3 bucket that this log retrieved from. + - name: object.key + type: keyword + description: Name of the S3 object that this log retrieved from. +- name: log.file.path + type: keyword + description: Path to the log file. +- name: log.offset + type: long + description: Log offset diff --git a/test/packages/parallel/aws_logs/data_stream/generic/manifest.yml b/test/packages/parallel/aws_logs/data_stream/generic/manifest.yml new file mode 100644 index 0000000000..7710d3c503 --- /dev/null +++ b/test/packages/parallel/aws_logs/data_stream/generic/manifest.yml @@ -0,0 +1,228 @@ +title: Custom logs from AWS +type: logs +streams: + - input: aws-s3 + enabled: false + template_path: aws-s3.yml.hbs + title: AWS logs from S3 + description: Collect logs using aws-s3 input with or without SQS notification + vars: + - name: api_timeout + type: text + title: API Timeout + multi: false + required: false + show_user: false + description: The maximum duration of AWS API can take. The maximum is half of the visibility timeout value. + - name: bucket_arn + type: text + title: Bucket ARN + multi: false + required: false + show_user: true + description: ARN of the AWS S3 bucket that will be polled for list operation. (Required when `queue_url` and `non_aws_bucket_name` are not set). + - name: number_of_workers + type: integer + title: Number of Workers + multi: false + required: false + default: 1 + show_user: true + description: Number of workers that will process the S3 objects listed. (Required when `bucket_arn` is set). + - name: bucket_list_interval + type: text + title: Bucket List Interval + multi: false + required: false + show_user: false + default: 120s + description: Time interval for polling listing of the S3 bucket. + - name: bucket_list_prefix + type: text + title: Bucket List Prefix + multi: false + required: false + show_user: false + description: Prefix to apply for the list request to the S3 bucket. + - name: buffer_size + type: text + title: Buffer Size + multi: false + required: false + show_user: false + description: The size in bytes of the buffer that each harvester uses when fetching a file. This only applies to non-JSON logs. + - name: content_type + type: text + title: Content Type + multi: false + required: false + show_user: false + description: > + A standard MIME type describing the format of the object data. This can be set to override the MIME type that was given to the object when it was uploaded. For example application/json. + + - name: encoding + type: text + title: Encoding + multi: false + required: false + show_user: false + description: The file encoding to use for reading data that contains international characters. This only applies to non-JSON logs. + - name: expand_event_list_from_field + type: text + title: Expand Event List from Field + multi: false + required: false + show_user: false + description: > + If the fileset using this input expects to receive multiple messages bundled under a specific field then the config option expand_event_list_from_field value can be assigned the name of the field. This setting will be able to split the messages under the group value into separate events. For example, CloudTrail logs are in JSON format and events are found under the JSON object "Records". + + - name: file_selectors + type: yaml + title: File Selectors + multi: true + required: false + show_user: false + description: > + If the SQS queue will have events that correspond to files that this integration shouldn’t process file_selectors can be used to limit the files that are downloaded. This is a list of selectors which are made up of regex and expand_event_list_from_field options. The regex should match the S3 object key in the SQS message, and the optional expand_event_list_from_field is the same as the global setting. If file_selectors is given, then any global expand_event_list_from_field value is ignored in favor of the ones specified in the file_selectors. Regex syntax is the same as the Go language. Files that don’t match one of the regexes won’t be processed. content_type, parsers, include_s3_metadata,max_bytes, buffer_size, and encoding may also be set for each file selector. + + - name: fips_enabled + type: bool + title: Enable S3 FIPS + default: false + multi: false + required: false + show_user: false + description: Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. + - name: include_s3_metadata + type: text + title: Include S3 Metadata + multi: true + required: false + show_user: false + description: > + This input can include S3 object metadata in the generated events for use in follow-on processing. You must specify the list of keys to include. By default none are included. If the key exists in the S3 response then it will be included in the event as aws.s3.metadata. where the key name as been normalized to all lowercase. + + - name: max_bytes + type: text + title: Max Bytes + default: 10MiB + multi: false + required: false + show_user: false + description: The maximum number of bytes that a single log message can have. All bytes after max_bytes are discarded and not sent. This setting is especially useful for multiline log messages, which can get large. This only applies to non-JSON logs. + - name: max_number_of_messages + type: integer + title: Maximum Concurrent SQS Messages + description: The maximum number of SQS messages that can be inflight at any time. + default: 5 + required: false + show_user: false + - name: parsers + type: yaml + title: Parsers + description: | + This option expects a list of parsers that the payload has to go through. For more information see [Parsers](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-aws-s3.html#input-aws-s3-parsers) + required: false + show_user: true + multi: false + default: | + #- multiline: + # pattern: "^ + Enabling this option sets the bucket name as a path in the API call instead of a subdomain. When enabled https://.s3...com becomes https://s3...com/. This is only supported with 3rd party S3 providers. AWS does not support path style. + + - name: provider + type: text + title: Provider Name + multi: false + required: false + show_user: false + description: Name of the 3rd party S3 bucket provider like backblaze or GCP. + - name: queue_url + type: text + title: Queue URL + multi: false + required: false + show_user: true + description: URL of the AWS SQS queue that messages will be received from. + - name: sqs.max_receive_count + type: integer + title: SQS Message Maximum Receive Count + multi: false + required: false + show_user: false + default: 5 + description: The maximum number of times a SQS message should be received (retried) before deleting it. This feature prevents poison-pill messages (messages that can be received but can’t be processed) from consuming resources. + - name: sqs.wait_time + type: text + title: SQS Maximum Wait Time + multi: false + required: false + show_user: false + default: 20s + description: > + The maximum duration that an SQS `ReceiveMessage` call should wait for a message to arrive in the queue before returning. The maximum value is `20s`. + + - name: visibility_timeout + type: text + title: Visibility Timeout + multi: false + required: false + show_user: false + description: The duration that the received messages are hidden from subsequent retrieve requests after being retrieved by a ReceiveMessage request. The maximum is 12 hours. + - name: tags + type: text + title: Tags + multi: true + required: true + show_user: true + default: + - forwarded + - name: processors + type: yaml + title: Processors + multi: false + required: false + show_user: false + description: > + Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details. + + - name: preserve_original_event + required: true + show_user: true + title: Preserve original event + description: Preserves a raw copy of the original event, added to the field `event.original` + type: bool + multi: false + default: false + - name: data_stream.dataset + type: text + required: true + default: aws_logs.generic + show_user: true + title: Dataset name + description: > + Set the name for your dataset. Changing the dataset will send the data to a different index. You can't use `-` in the name of a dataset and only valid characters for [Elasticsearch index names](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html). + + - name: pipeline + type: text + title: Ingest Pipeline + description: | + The Ingest Node pipeline ID to be used by the integration. + required: false + show_user: true diff --git a/test/packages/parallel/aws_logs/data_stream/generic/sample_event.json b/test/packages/parallel/aws_logs/data_stream/generic/sample_event.json new file mode 100644 index 0000000000..e07cb4fe65 --- /dev/null +++ b/test/packages/parallel/aws_logs/data_stream/generic/sample_event.json @@ -0,0 +1,21 @@ +{ + "@timestamp": "2020-02-20T07:02:37.000Z", + "data_stream": { + "namespace": "default", + "type": "logs", + "dataset": "aws_logs.generic" + }, + "ecs": { + "version": "8.0.0" + }, + "event": { + "ingested": "2021-07-19T21:47:04.696803300Z", + "original": "2020-02-20T07:02:37.000Z Feb 20 07:02:37 ip-172-31-81-156 ec2net: [get_meta] Trying to get http://169.254.169.254/latest/meta-data/network/interfaces/macs/12:e2:a9:95:8b:97/local-ipv4s", + "kind": "event", + "dataset": "aws_logs.generic" + }, + "message": "ip-172-31-81-156 ec2net: [get_meta] Trying to get http://169.254.169.254/latest/meta-data/network/interfaces/macs/12:e2:a9:95:8b:97/local-ipv4s", + "tags": [ + "preserve_original_event" + ] +} \ No newline at end of file diff --git a/test/packages/parallel/aws_logs/docs/README.md b/test/packages/parallel/aws_logs/docs/README.md new file mode 100644 index 0000000000..1b16353c9f --- /dev/null +++ b/test/packages/parallel/aws_logs/docs/README.md @@ -0,0 +1,28 @@ +# Custom AWS Log Integration + +The custom AWS input integration offers users two ways to collect logs from AWS: from an [S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ServerLogs.html) (with or without SQS notification) and from [CloudWatch](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/WhatIsCloudWatchLogs.html). +Custom ingest pipelines may be added by adding the name to the pipeline configuration option, creating custom ingest pipelines can be done either through the API or the [Ingest Node Pipeline UI](/app/management/ingest/ingest_pipelines/). + +## Collecting logs from S3 bucket + +When collecting logs from S3 bucket is enabled, users can retrieve logs from S3 +objects that are pointed to by S3 notification events read from an SQS queue or +directly polling list of S3 objects in an S3 bucket. + +The use of SQS notification is preferred: polling list of S3 objects is +expensive in terms of performance and costs and should be preferably used only +when no SQS notification can be attached to the S3 buckets. This input +integration also supports S3 notification from SNS to SQS. + +SQS notification method is enabled setting `queue_url` configuration value. S3 +bucket list polling method is enabled setting `bucket_arn` configuration value +and `number_of_workers` value. Both `queue_url` and `bucket_arn` cannot be set +at the same time and at least one of the two value must be set. + +## Collecting logs from CloudWatch + +When collecting logs from CloudWatch is enabled, users can retrieve logs from +all log streams in a specific log group. `filterLogEvents` AWS API is used to +list log events from the specified log group. Amazon CloudWatch Logs can be used +to store log files from Amazon Elastic Compute Cloud(EC2), AWS CloudTrail, +Route53, and other sources. diff --git a/test/packages/parallel/aws_logs/img/icon.svg b/test/packages/parallel/aws_logs/img/icon.svg new file mode 100644 index 0000000000..e60377c8bd --- /dev/null +++ b/test/packages/parallel/aws_logs/img/icon.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/test/packages/parallel/aws_logs/manifest.yml b/test/packages/parallel/aws_logs/manifest.yml new file mode 100644 index 0000000000..fe7e906abd --- /dev/null +++ b/test/packages/parallel/aws_logs/manifest.yml @@ -0,0 +1,94 @@ +format_version: 1.0.0 +name: aws_logs +title: Custom AWS Logs +description: Collect raw logs from AWS S3 or CloudWatch with Elastic Agent. +type: integration +# version is set to something very large to so this test package can +# be installed in the package registry regardless of the version of +# the actual apache package in the registry at any given time. +version: 999.999.999 +release: beta +license: basic +categories: + - observability + - custom + - aws +conditions: + kibana.version: ^8.0.0 +policy_templates: + - name: aws_logs + title: Custom AWS Logs + description: Collect raw logs from AWS S3 or CloudWatch with Elastic Agent. + inputs: + - type: aws-s3 + title: Collect Logs from S3 Bucket + description: Collect raw logs from S3 bucket with Elastic Agent. + - type: aws-cloudwatch + title: Collect Logs from CloudWatch + description: Collect raw logs from CloudWatch with Elastic Agent. +icons: + - src: "/img/icon.svg" + type: "image/svg+xml" +vars: + - name: shared_credential_file + type: text + title: Shared Credential File + multi: false + required: false + show_user: false + description: Directory of the shared credentials file + - name: credential_profile_name + type: text + title: Credential Profile Name + multi: false + required: false + show_user: false + - name: access_key_id + type: text + title: Access Key ID + multi: false + required: false + show_user: true + - name: secret_access_key + type: text + title: Secret Access Key + multi: false + required: false + show_user: true + - name: session_token + type: text + title: Session Token + multi: false + required: false + show_user: true + - name: role_arn + type: text + title: Role ARN + multi: false + required: false + show_user: false + - name: endpoint + type: text + title: Endpoint + multi: false + required: false + show_user: false + default: "" + description: URL of the entry point for an AWS web service + - name: default_region + type: text + title: Default AWS Region + multi: false + required: false + show_user: false + default: "" + description: Default region to use prior to connecting to region specific services/endpoints if no AWS region is set from environment variable, credentials or instance profile. If none of the above are set and no default region is set as well, `us-east-1` is used. A region, either from environment variable, credentials or instance profile or from this default region setting, needs to be set when using regions in non-regular AWS environments such as AWS China or US Government Isolated. + - name: proxy_url + type: text + title: Proxy URL + multi: false + required: false + show_user: false + description: URL to proxy connections in the form of http\[s\]://:@: +owner: + github: elastic/obs-cloud-monitoring