diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index e6128c1c75..b051542557 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -46,13 +46,33 @@ export CREATED_DATE # Secrets must be redacted # https://buildkite.com/docs/pipelines/managing-log-output#redacted-environment-variables -if [[ "$BUILDKITE_PIPELINE_SLUG" == "elastic-package" && ("$BUILDKITE_STEP_KEY" =~ ^integration-parallel || "$BUILDKITE_STEP_KEY" =~ ^integration-false_positives) ]]; then +is_step_required_to_upload_safe_logs() { + if [[ "$BUILDKITE_PIPELINE_SLUG" != "elastic-package" ]]; then + return 1 + fi + if [[ "$BUILDKITE_STEP_KEY" =~ ^integration-parallel || "$BUILDKITE_STEP_KEY" =~ ^integration-false_positives ]]; then + return 0 + fi + return 1 +} + +if is_step_required_to_upload_safe_logs; then PRIVATE_CI_GCS_CREDENTIALS_SECRET=$(retry 5 vault kv get -field plaintext -format=json ${PRIVATE_CI_GCS_CREDENTIALS_PATH} | jq -c) export PRIVATE_CI_GCS_CREDENTIALS_SECRET export JOB_GCS_BUCKET_INTERNAL="ingest-buildkite-ci" fi -if [[ "$BUILDKITE_PIPELINE_SLUG" == "elastic-package" && "$BUILDKITE_STEP_KEY" == "integration-parallel-gcp" ]]; then +is_step_testing_gcp () { + if [[ "$BUILDKITE_PIPELINE_SLUG" != "elastic-package" ]]; then + return 1 + fi + if [[ "$BUILDKITE_STEP_KEY" == "integration-parallel-gcp-agent-false" || "$BUILDKITE_STEP_KEY" == "integration-parallel-gcp-agent-true" ]]; then + return 0 + fi + return 1 +} + +if is_step_testing_gcp; then ELASTIC_PACKAGE_GCP_PROJECT_SECRET=$(retry 5 vault read -field projectId ${GCP_SERVICE_ACCOUNT_SECRET_PATH}) export ELASTIC_PACKAGE_GCP_PROJECT_SECRET ELASTIC_PACKAGE_GCP_CREDENTIALS_SECRET=$(retry 5 vault read -field credentials ${GCP_SERVICE_ACCOUNT_SECRET_PATH} | jq -c) @@ -63,7 +83,20 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "elastic-package" && "$BUILDKITE_STEP_KEY" = export GCP_PROJECT_ID=${ELASTIC_PACKAGE_GCP_PROJECT_SECRET} fi -if [[ "$BUILDKITE_PIPELINE_SLUG" == "elastic-package" && ("$BUILDKITE_STEP_KEY" == "integration-parallel-aws" || "$BUILDKITE_STEP_KEY" == "integration-parallel-aws_logs") ]]; then +is_step_testing_aws () { + if [[ "$BUILDKITE_PIPELINE_SLUG" != "elastic-package" ]]; then + return 1 + fi + if [[ "$BUILDKITE_STEP_KEY" == "integration-parallel-aws-agent-false" || "$BUILDKITE_STEP_KEY" == "integration-parallel-aws-agent-true" ]]; then + return 0 + fi + if [[ "$BUILDKITE_STEP_KEY" == "integration-parallel-aws_logs-agent-false" || "$BUILDKITE_STEP_KEY" == "integration-parallel-aws_logs-agent-true" ]]; then + return 0 + fi + return 1 +} + +if is_step_testing_aws; then ELASTIC_PACKAGE_AWS_SECRET_KEY=$(retry 5 vault kv get -field secret_key ${AWS_SERVICE_ACCOUNT_SECRET_PATH}) export ELASTIC_PACKAGE_AWS_SECRET_KEY ELASTIC_PACKAGE_AWS_ACCESS_KEY=$(retry 5 vault kv get -field access_key ${AWS_SERVICE_ACCOUNT_SECRET_PATH}) diff --git a/.buildkite/pipeline.trigger.integration.tests.sh b/.buildkite/pipeline.trigger.integration.tests.sh index afd61045e4..5da18fc06d 100755 --- a/.buildkite/pipeline.trigger.integration.tests.sh +++ b/.buildkite/pipeline.trigger.integration.tests.sh @@ -3,7 +3,6 @@ # exit immediately on failure, or if an undefined variable is used set -eu - # begin the pipeline.yml file echo "steps:" echo " - group: \":terminal: Integration test suite\"" @@ -21,7 +20,7 @@ STACK_COMMAND_TESTS=( ) for test in "${STACK_COMMAND_TESTS[@]}"; do - echo " - label: \":go: Running integration test: ${test}\"" + echo " - label: \":go: Integration test: ${test}\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t ${test}" echo " agents:" echo " provider: \"gcp\"" @@ -38,11 +37,14 @@ CHECK_PACKAGES_TESTS=( test-check-packages-benchmarks test-check-packages-with-logstash ) +for independent_agent in false true ; do for test in "${CHECK_PACKAGES_TESTS[@]}"; do - echo " - label: \":go: Running integration test: ${test}\"" + echo " - label: \":go: Integration test: ${test} - independent_agent ${independent_agent}\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t ${test}" echo " agents:" echo " provider: \"gcp\"" + echo " env:" + echo " ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT: ${independent_agent}" echo " artifact_paths:" echo " - build/test-results/*.xml" echo " - build/elastic-stack-dump/check-*/logs/*.log" @@ -52,11 +54,12 @@ for test in "${CHECK_PACKAGES_TESTS[@]}"; do echo " - build/kubectl-dump.txt" fi done +done pushd test/packages/false_positives > /dev/null for package in $(find . -maxdepth 1 -mindepth 1 -type d) ; do package_name=$(basename "${package}") - echo " - label: \":go: Running integration test (false positive): ${package_name}\"" + echo " - label: \":go: Integration test (false positive): ${package_name}\"" echo " key: \"integration-false_positives-${package_name}\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-check-packages-false-positives -p ${package_name}" echo " env:" @@ -71,23 +74,26 @@ done popd > /dev/null pushd test/packages/parallel > /dev/null +for independent_agent in false true; do for package in $(find . -maxdepth 1 -mindepth 1 -type d) ; do package_name=$(basename "${package}") - echo " - label: \":go: Running integration test: ${package_name}\"" - echo " key: \"integration-parallel-${package_name}\"" + echo " - label: \":go: Integration test: ${package_name} - independent_agent ${independent_agent}\"" + echo " key: \"integration-parallel-${package_name}-agent-${independent_agent}\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-check-packages-parallel -p ${package_name}" echo " env:" echo " UPLOAD_SAFE_LOGS: 1" + echo " ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT: ${independent_agent}" echo " agents:" echo " provider: \"gcp\"" echo " artifact_paths:" echo " - build/test-results/*.xml" echo " - build/test-coverage/coverage-*.xml" # these files should not be used to compute the final coverage of elastic-package done +done popd > /dev/null -echo " - label: \":go: Running integration test: test-build-zip\"" +echo " - label: \":go: Integration test: test-build-zip\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-build-zip" echo " agents:" echo " provider: \"gcp\"" @@ -95,26 +101,30 @@ echo " artifact_paths:" echo " - build/elastic-stack-dump/build-zip/logs/*.log" echo " - build/packages/*.sig" -echo " - label: \":go: Running integration test: test-install-zip\"" +echo " - label: \":go: Integration test: test-install-zip\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-install-zip" echo " agents:" echo " provider: \"gcp\"" echo " artifact_paths:" echo " - build/elastic-stack-dump/install-zip/logs/*.log" -echo " - label: \":go: Running integration test: test-install-zip-shellinit\"" +echo " - label: \":go: Integration test: test-install-zip-shellinit\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-install-zip-shellinit" echo " agents:" echo " provider: \"gcp\"" echo " artifact_paths:" echo " - build/elastic-stack-dump/install-zip-shellinit/logs/*.log" -echo " - label: \":go: Running integration test: test-system-test-flags\"" +for independent_agent in false true; do +echo " - label: \":go: Integration test: test-system-test-flags - independent_agent ${independent_agent}\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-system-test-flags" echo " agents:" echo " provider: \"gcp\"" +echo " env:" +echo " ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT: ${independent_agent}" +done -echo " - label: \":go: Running integration test: test-profiles-command\"" +echo " - label: \":go: Integration test: test-profiles-command\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-profiles-command" echo " env:" echo " DOCKER_COMPOSE_VERSION: \"false\"" @@ -124,7 +134,7 @@ echo " image: \"${LINUX_AGENT_IMAGE}\"" echo " cpu: \"8\"" echo " memory: \"4G\"" -echo " - label: \":go: Running integration test: test-check-update-version\"" +echo " - label: \":go: Integration test: test-check-update-version\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-check-update-version" echo " env:" echo " DEFAULT_VERSION_TAG: v0.80.0" diff --git a/.buildkite/scripts/integration_tests.sh b/.buildkite/scripts/integration_tests.sh index 9973585301..56ec2d1727 100755 --- a/.buildkite/scripts/integration_tests.sh +++ b/.buildkite/scripts/integration_tests.sh @@ -41,6 +41,7 @@ KIND_TARGET="test-check-packages-with-kind" SYSTEM_TEST_FLAGS_TARGET="test-system-test-flags" TMP_FOLDER_TEMPLATE="${TMP_FOLDER_TEMPLATE_BASE}.XXXXXXXXX" GOOGLE_CREDENTIALS_FILENAME="google-cloud-credentials.json" +ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT=${ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT:-"false"} REPO_NAME=$(repo_name "${BUILDKITE_REPO}") REPO_BUILD_TAG="${REPO_NAME}/$(buildkite_pr_branch_build_id)" @@ -132,21 +133,25 @@ if [[ "${TARGET}" == "${PARALLEL_TARGET}" ]] || [[ "${TARGET}" == "${FALSE_POSIT set -e if [[ "${UPLOAD_SAFE_LOGS}" -eq 1 ]] ; then + package_folder="${PACKAGE}" + if [[ "${ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT}" != "false" ]]; then + package_folder="${package_folder}-independent_agent" + fi upload_safe_logs \ "${JOB_GCS_BUCKET_INTERNAL}" \ "build/elastic-stack-dump/check-${PACKAGE}/logs/elastic-agent-internal/*.*" \ - "insecure-logs/${PACKAGE}/elastic-agent-logs/" + "insecure-logs/${package_folder}/elastic-agent-logs/" # required for <8.6.0 upload_safe_logs \ "${JOB_GCS_BUCKET_INTERNAL}" \ "build/elastic-stack-dump/check-${PACKAGE}/logs/elastic-agent-internal/default/*" \ - "insecure-logs/${PACKAGE}/elastic-agent-logs/default/" + "insecure-logs/${package_folder}/elastic-agent-logs/default/" upload_safe_logs \ "${JOB_GCS_BUCKET_INTERNAL}" \ "build/container-logs/*.log" \ - "insecure-logs/${PACKAGE}/container-logs/" + "insecure-logs/${package_folder}/container-logs/" fi if [ $testReturnCode != 0 ]; then diff --git a/cmd/testrunner.go b/cmd/testrunner.go index 1c21669f7e..77225a5906 100644 --- a/cmd/testrunner.go +++ b/cmd/testrunner.go @@ -18,6 +18,7 @@ import ( "github.com/elastic/elastic-package/internal/cobraext" "github.com/elastic/elastic-package/internal/common" "github.com/elastic/elastic-package/internal/elasticsearch" + "github.com/elastic/elastic-package/internal/environment" "github.com/elastic/elastic-package/internal/install" "github.com/elastic/elastic-package/internal/kibana" "github.com/elastic/elastic-package/internal/logger" @@ -52,6 +53,8 @@ These tests allow you to test a package's ability to ingest data end-to-end. For details on how to configure amd run system tests, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/system_testing.md).` +var enableIndependentAgents = environment.WithElasticPackagePrefix("TEST_ENABLE_INDEPENDENT_AGENT") + func setupTestCommand() *cobraext.Command { var testTypeCmdActions []cobraext.CommandAction @@ -186,6 +189,12 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command return fmt.Errorf("cannot determine if package has data streams: %w", err) } + runIndependentElasticAgent := false + v, ok := os.LookupEnv(enableIndependentAgents) + if ok && strings.ToLower(v) != "false" { + runIndependentElasticAgent = true + } + configFileFlag := "" runSetup := false runTearDown := false @@ -340,20 +349,21 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command var results []testrunner.TestResult for _, folder := range testFolders { r, err := testrunner.Run(ctx, testType, testrunner.TestOptions{ - Profile: profile, - TestFolder: folder, - PackageRootPath: packageRootPath, - GenerateTestResult: generateTestResult, - API: esAPI, - KibanaClient: kibanaClient, - DeferCleanup: deferCleanup, - ServiceVariant: variantFlag, - WithCoverage: testCoverage, - CoverageType: testCoverageFormat, - ConfigFilePath: configFileFlag, - RunSetup: runSetup, - RunTearDown: runTearDown, - RunTestsOnly: runTestsOnly, + Profile: profile, + TestFolder: folder, + PackageRootPath: packageRootPath, + GenerateTestResult: generateTestResult, + API: esAPI, + KibanaClient: kibanaClient, + DeferCleanup: deferCleanup, + ServiceVariant: variantFlag, + WithCoverage: testCoverage, + CoverageType: testCoverageFormat, + ConfigFilePath: configFileFlag, + RunSetup: runSetup, + RunTearDown: runTearDown, + RunTestsOnly: runTestsOnly, + RunIndependentElasticAgent: runIndependentElasticAgent, }) results = append(results, r...) diff --git a/internal/agentdeployer/_static/docker-agent-base.yml b/internal/agentdeployer/_static/docker-agent-base.yml new file mode 100644 index 0000000000..c16f94c3ed --- /dev/null +++ b/internal/agentdeployer/_static/docker-agent-base.yml @@ -0,0 +1,18 @@ +version: "2.3" +services: + elastic-agent: + hostname: ${AGENT_HOSTNAME} + image: "${ELASTIC_AGENT_IMAGE_REF}" + healthcheck: + test: "elastic-agent status" + retries: 180 + interval: 1s + environment: + - FLEET_ENROLL=1 + - FLEET_URL=https://fleet-server:8220 + - KIBANA_HOST=https://kibana:5601 + - FLEET_TOKEN_POLICY_NAME=${FLEET_TOKEN_POLICY_NAME} + volumes: + - ${SERVICE_LOGS_DIR}:/tmp/service_logs/ + - ${LOCAL_CA_CERT}:/etc/ssl/certs/elastic-package.pem + diff --git a/internal/agentdeployer/_static/elastic-agent-managed.yaml.tmpl b/internal/agentdeployer/_static/elastic-agent-managed.yaml.tmpl new file mode 100644 index 0000000000..c2ce38713a --- /dev/null +++ b/internal/agentdeployer/_static/elastic-agent-managed.yaml.tmpl @@ -0,0 +1,293 @@ +apiVersion: v1 +kind: Secret +metadata: + name: elastic-package-certs + namespace: kube-system +data: + ca-cert.pem: {{ .caCertPem }} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: elastic-agent + namespace: kube-system + labels: + app: elastic-agent +spec: + selector: + matchLabels: + app: elastic-agent + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + template: + metadata: + labels: + app: elastic-agent + spec: + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + serviceAccountName: elastic-agent + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: elastic-agent + image: {{ .elasticAgentImage }} + env: + - name: FLEET_ENROLL + value: "1" + # The ip:port pair of fleet server + - name: FLEET_URL + value: {{ .fleetURL }} + # If left empty KIBANA_HOST, KIBANA_FLEET_USERNAME, KIBANA_FLEET_PASSWORD are needed + - name: FLEET_ENROLLMENT_TOKEN + value: "" + - name: FLEET_TOKEN_POLICY_NAME + value: "{{ .elasticAgentTokenPolicyName }}" + - name: KIBANA_HOST + value: {{ .kibanaURL }} + - name: KIBANA_FLEET_USERNAME + value: "elastic" + - name: KIBANA_FLEET_PASSWORD + value: "changeme" + - name: SSL_CERT_DIR + value: "/etc/ssl/certs:/etc/ssl/elastic-package" + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + securityContext: + runAsUser: 0 + resources: + limits: + memory: 500Mi + requests: + cpu: 100m + memory: 200Mi + livenessProbe: + exec: + command: + - elastic-agent + - status + initialDelaySeconds: 10 + periodSeconds: 10 + volumeMounts: + - name: elastic-package-ca + mountPath: /etc/ssl/elastic-package + readOnly: true + - name: proc + mountPath: /hostfs/proc + readOnly: true + - name: etc-kubernetes + mountPath: /hostfs/etc/kubernetes + - name: var-lib + mountPath: /hostfs/var/lib + readOnly: true + - name: cgroup + mountPath: /hostfs/sys/fs/cgroup + readOnly: true + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: varlog + mountPath: /var/log + readOnly: true + - name: passwd + mountPath: /hostfs/etc/passwd + readOnly: true + - name: group + mountPath: /hostfs/etc/group + readOnly: true + - name: etcsysmd + mountPath: /hostfs/etc/systemd + readOnly: true + volumes: + - name: elastic-package-ca + secret: + secretName: elastic-package-certs + - name: proc + hostPath: + path: /proc + - name: etc-kubernetes + hostPath: + path: /etc/kubernetes + - name: var-lib + hostPath: + path: /var/lib + - name: passwd + hostPath: + path: /etc/passwd + - name: group + hostPath: + path: /etc/group + - name: cgroup + hostPath: + path: /sys/fs/cgroup + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: varlog + hostPath: + path: /var/log + - name: etcsysmd + hostPath: + path: /etc/systemd +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: elastic-agent +subjects: + - kind: ServiceAccount + name: elastic-agent + namespace: kube-system +roleRef: + kind: ClusterRole + name: elastic-agent + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + namespace: kube-system + name: elastic-agent +subjects: + - kind: ServiceAccount + name: elastic-agent + namespace: kube-system +roleRef: + kind: Role + name: elastic-agent + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: elastic-agent-kubeadm-config + namespace: kube-system +subjects: + - kind: ServiceAccount + name: elastic-agent + namespace: kube-system +roleRef: + kind: Role + name: elastic-agent-kubeadm-config + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: elastic-agent + labels: + k8s-app: elastic-agent +rules: + - apiGroups: [""] + resources: + - nodes + - namespaces + - events + - pods + - services + - configmaps + - serviceaccounts + - persistentvolumes + - persistentvolumeclaims + verbs: ["get", "list", "watch"] + # Enable this rule only if planing to use kubernetes_secrets provider + #- apiGroups: [""] + # resources: + # - secrets + # verbs: ["get"] + - apiGroups: ["extensions"] + resources: + - replicasets + verbs: ["get", "list", "watch"] + - apiGroups: ["apps"] + resources: + - statefulsets + - deployments + - replicasets + - daemonsets + verbs: ["get", "list", "watch"] + - apiGroups: + - "" + resources: + - nodes/stats + verbs: + - get + - apiGroups: [ "batch" ] + resources: + - jobs + - cronjobs + verbs: [ "get", "list", "watch" ] + # required for apiserver + - nonResourceURLs: + - "/metrics" + verbs: + - get + - apiGroups: ["rbac.authorization.k8s.io"] + resources: + - clusterrolebindings + - clusterroles + - rolebindings + - roles + verbs: ["get", "list", "watch"] + - apiGroups: ["networking.k8s.io"] + resources: + - ingressclasses + - ingresses + verbs: ["get", "list", "watch"] + - apiGroups: ["policy"] + resources: + - podsecuritypolicies + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: elastic-agent + # should be the namespace where elastic-agent is running + namespace: kube-system + labels: + k8s-app: elastic-agent +rules: + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: ["get", "create", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: elastic-agent-kubeadm-config + namespace: kube-system + labels: + k8s-app: elastic-agent +rules: + - apiGroups: [""] + resources: + - configmaps + resourceNames: + - kubeadm-config + verbs: ["get"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: elastic-agent + namespace: kube-system + labels: + k8s-app: elastic-agent +--- diff --git a/internal/agentdeployer/agent.go b/internal/agentdeployer/agent.go new file mode 100644 index 0000000000..dc05e9b074 --- /dev/null +++ b/internal/agentdeployer/agent.go @@ -0,0 +1,355 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package agentdeployer + +import ( + "context" + _ "embed" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/elastic/elastic-package/internal/compose" + "github.com/elastic/elastic-package/internal/configuration/locations" + "github.com/elastic/elastic-package/internal/docker" + "github.com/elastic/elastic-package/internal/files" + "github.com/elastic/elastic-package/internal/install" + "github.com/elastic/elastic-package/internal/logger" + "github.com/elastic/elastic-package/internal/profile" + "github.com/elastic/elastic-package/internal/stack" +) + +const ( + dockerTestAgentNamePrefix = "elastic-agent" + dockerTestgentDir = "docker_test_agent" + dockerTestAgentDockerCompose = "docker-agent-base.yml" + defaultAgentPolicyName = "Elastic-Agent (elastic-package)" +) + +//go:embed _static/docker-agent-base.yml +var dockerAgentDockerComposeContent []byte + +// CustomAgentDeployer knows how to deploy a custom elastic-agent defined via +// a Docker Compose file. +type DockerComposeAgentDeployer struct { + profile *profile.Profile + dockerComposeFile string + stackVersion string + + variant AgentVariant + policyName string + + agentRunID string + + packageName string + dataStream string + + runTearDown bool + runTestsOnly bool +} + +type DockerComposeAgentDeployerOptions struct { + Profile *profile.Profile + DockerComposeFile string + StackVersion string + Variant AgentVariant + PolicyName string + + PackageName string + DataStream string + + RunTearDown bool + RunTestsOnly bool +} + +var _ AgentDeployer = new(DockerComposeAgentDeployer) + +type dockerComposeDeployedAgent struct { + agentInfo AgentInfo + + ymlPaths []string + project string + variant AgentVariant + env []string +} + +var _ DeployedAgent = new(dockerComposeDeployedAgent) + +// NewCustomAgentDeployer returns a new instance of a deployedCustomAgent. +func NewCustomAgentDeployer(options DockerComposeAgentDeployerOptions) (*DockerComposeAgentDeployer, error) { + return &DockerComposeAgentDeployer{ + profile: options.Profile, + dockerComposeFile: options.DockerComposeFile, + stackVersion: options.StackVersion, + packageName: options.PackageName, + dataStream: options.DataStream, + policyName: options.PolicyName, + variant: options.Variant, + runTearDown: options.RunTearDown, + runTestsOnly: options.RunTestsOnly, + }, nil +} + +// SetUp sets up the service and returns any relevant information. +func (d *DockerComposeAgentDeployer) SetUp(ctx context.Context, agentInfo AgentInfo) (DeployedAgent, error) { + logger.Debug("setting up agent using Docker Compose agent deployer") + d.agentRunID = agentInfo.Test.RunID + + appConfig, err := install.Configuration() + if err != nil { + return nil, fmt.Errorf("can't read application configuration: %w", err) + } + + caCertPath, err := stack.FindCACertificate(d.profile) + if err != nil { + return nil, fmt.Errorf("can't locate CA certificate: %w", err) + } + + env := append( + appConfig.StackImageRefs(d.stackVersion).AsEnv(), + fmt.Sprintf("%s=%s", serviceLogsDirEnv, agentInfo.Logs.Folder.Local), + fmt.Sprintf("%s=%s", localCACertEnv, caCertPath), + fmt.Sprintf("%s=%s", fleetPolicyEnv, d.policyName), + fmt.Sprintf("%s=%s", agentHostnameEnv, d.agentHostname()), + ) + + configDir, err := d.installDockerfile() + if err != nil { + return nil, fmt.Errorf("could not create resources for custom agent: %w", err) + } + + ymlPaths := []string{ + filepath.Join(configDir, dockerTestAgentDockerCompose), + } + if d.dockerComposeFile != "" { + ymlPaths = []string{ + d.dockerComposeFile, + filepath.Join(configDir, dockerTestAgentDockerCompose), + } + } + + composeProjectName := fmt.Sprintf("elastic-package-agent-%s", d.agentName()) + + agent := dockerComposeDeployedAgent{ + ymlPaths: ymlPaths, + project: composeProjectName, + variant: d.variant, + env: env, + } + + agentInfo.ConfigDir = configDir + agentInfo.NetworkName = fmt.Sprintf("%s_default", composeProjectName) + + p, err := compose.NewProject(agent.project, agent.ymlPaths...) + if err != nil { + return nil, fmt.Errorf("could not create Docker Compose project for agent: %w", err) + } + + // Verify the Elastic stack network + err = stack.EnsureStackNetworkUp(d.profile) + if err != nil { + return nil, fmt.Errorf("stack network is not ready: %w", err) + } + + // Clean service logs + if d.runTestsOnly { + // service logs folder must no be deleted to avoid breaking log files written + // by the service. If this is required, those files should be rotated or truncated + // so the service can still write to them. + logger.Debug("Skipping removing service logs folder folder %s", agentInfo.Logs.Folder.Local) + } else { + err = files.RemoveContent(agentInfo.Logs.Folder.Local) + if err != nil { + return nil, fmt.Errorf("removing service logs failed: %w", err) + } + } + + // Service name defined in the docker-compose file + agentInfo.Name = dockerTestAgentNamePrefix + agentName := agentInfo.Name + + if d.variant.active() { + logger.Infof("Using variant: %s", d.variant.String()) + } + + opts := compose.CommandOptions{ + Env: env, + ExtraArgs: []string{"--build", "-d"}, + } + + if d.runTestsOnly || d.runTearDown { + logger.Debug("Skipping bringing up docker-compose project and connect container to network (non setup steps)") + } else { + err = p.Up(ctx, opts) + if err != nil { + return nil, fmt.Errorf("could not boot up agent using Docker Compose: %w", err) + } + // Connect service network with stack network (for the purpose of metrics collection) + err = docker.ConnectToNetwork(p.ContainerName(agentName), stack.Network(d.profile)) + if err != nil { + return nil, fmt.Errorf("can't attach agent container to the stack network: %w", err) + } + } + + // requires to be connected the service to the stack network + err = p.WaitForHealthy(ctx, opts) + if err != nil { + processAgentContainerLogs(ctx, p, compose.CommandOptions{ + Env: opts.Env, + }, agentName) + return nil, fmt.Errorf("service is unhealthy: %w", err) + } + + // Build agent container name + // For those packages that require to do requests to agent ports in their tests (e.g. ti_anomali), + // using the ContainerName of the agent (p.ContainerName(agentName)) as in servicedeployer does not work, + // probably because it is in another compose project in case of ti_anomali?. + agentInfo.Hostname = d.agentHostname() + + logger.Debugf("adding service container %s internal ports to context", p.ContainerName(agentName)) + serviceComposeConfig, err := p.Config(ctx, compose.CommandOptions{Env: env}) + if err != nil { + return nil, fmt.Errorf("could not get Docker Compose configuration for service: %w", err) + } + + s := serviceComposeConfig.Services[agentName] + agentInfo.Ports = make([]int, len(s.Ports)) + for idx, port := range s.Ports { + agentInfo.Ports[idx] = port.InternalPort + } + + // Shortcut to first port for convenience + if len(agentInfo.Ports) > 0 { + agentInfo.Port = agentInfo.Ports[0] + } + + agentInfo.Agent.Host.NamePrefix = agentInfo.Name + agent.agentInfo = agentInfo + return &agent, nil +} + +func (d *DockerComposeAgentDeployer) agentHostname() string { + return fmt.Sprintf("%s-%s-%s", dockerTestAgentNamePrefix, d.agentName(), d.agentRunID) +} + +func (d *DockerComposeAgentDeployer) agentName() string { + name := d.packageName + if d.variant.Name != "" { + name = fmt.Sprintf("%s-%s", name, d.variant.Name) + } + if d.dataStream != "" && d.dataStream != "." { + name = fmt.Sprintf("%s-%s", name, d.dataStream) + } + return name +} + +// installDockerfile creates the files needed to run the custom elastic agent and returns +// the directory with these files. +func (d *DockerComposeAgentDeployer) installDockerfile() (string, error) { + customAgentDir := filepath.Join(d.profile.ProfilePath, fmt.Sprintf("agent-%s", d.agentName())) + err := os.MkdirAll(customAgentDir, 0755) + if err != nil { + return "", fmt.Errorf("failed to create directory for custom agent files: %w", err) + } + + customAgentDockerfile := filepath.Join(customAgentDir, dockerTestAgentDockerCompose) + err = os.WriteFile(customAgentDockerfile, dockerAgentDockerComposeContent, 0644) + if err != nil { + return "", fmt.Errorf("failed to create docker compose file for custom agent: %w", err) + } + + return customAgentDir, nil +} + +func CreateServiceLogsDir(elasticPackagePath *locations.LocationManager, name string) (string, error) { + dirPath := elasticPackagePath.ServiceLogDirPerAgent(name) + err := os.MkdirAll(dirPath, 0755) + if err != nil { + return "", fmt.Errorf("mkdir failed (path: %s): %w", dirPath, err) + } + return dirPath, nil +} + +// ExitCode returns true if the agent is exited and its exit code. +func (s *dockerComposeDeployedAgent) ExitCode(ctx context.Context) (bool, int, error) { + p, err := compose.NewProject(s.project, s.ymlPaths...) + if err != nil { + return false, -1, fmt.Errorf("could not create Docker Compose project for agent: %w", err) + } + + opts := compose.CommandOptions{ + Env: append( + s.env, + s.variant.Env..., + ), + } + + return p.ServiceExitCode(ctx, s.agentInfo.Name, opts) +} + +// Logs returns the logs from the agent starting at the given time +func (s *dockerComposeDeployedAgent) Logs(ctx context.Context, t time.Time) ([]byte, error) { + p, err := compose.NewProject(s.project, s.ymlPaths...) + if err != nil { + return nil, fmt.Errorf("could not create Docker Compose project for agent: %w", err) + } + + opts := compose.CommandOptions{ + Env: append( + s.env, + s.variant.Env..., + ), + } + + return p.Logs(ctx, opts) +} + +// TearDown tears down the agent. +func (s *dockerComposeDeployedAgent) TearDown(ctx context.Context) error { + logger.Debugf("tearing down agent using Docker Compose runner") + defer func() { + err := files.RemoveContent(s.agentInfo.Logs.Folder.Local) + if err != nil { + logger.Errorf("could not remove the agent logs (path: %s)", s.agentInfo.Logs.Folder.Local) + } + + // Remove the configuration dir (e.g. compose scenario files) + if err = os.RemoveAll(s.agentInfo.ConfigDir); err != nil { + logger.Errorf("could not remove the agent configuration directory %w", err) + } + }() + + p, err := compose.NewProject(s.project, s.ymlPaths...) + if err != nil { + return fmt.Errorf("could not create Docker Compose project for service: %w", err) + } + + opts := compose.CommandOptions{ + Env: append( + s.env, + s.variant.Env..., + ), + } + processAgentContainerLogs(ctx, p, opts, s.agentInfo.Name) + + if err := p.Down(ctx, compose.CommandOptions{ + Env: opts.Env, + ExtraArgs: []string{"--volumes"}, // Remove associated volumes. + }); err != nil { + return fmt.Errorf("could not shut down agent using Docker Compose: %w", err) + } + return nil +} + +// Info returns the current context for the agent. +func (s *dockerComposeDeployedAgent) Info() AgentInfo { + return s.agentInfo +} + +// SetInfo sets the current context for the agent. +func (s *dockerComposeDeployedAgent) SetInfo(info AgentInfo) { + s.agentInfo = info +} diff --git a/internal/agentdeployer/deployed_agent.go b/internal/agentdeployer/deployed_agent.go new file mode 100644 index 0000000000..6c49aef23b --- /dev/null +++ b/internal/agentdeployer/deployed_agent.go @@ -0,0 +1,31 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package agentdeployer + +import ( + "context" + "errors" + "time" +) + +var ErrNotSupported error = errors.New("not supported") + +// DeployedAgent defines the interface for interacting with an agent that has been deployed. +type DeployedAgent interface { + // TearDown implements the logic for tearing down an agent. + TearDown(ctx context.Context) error + + // Info returns the current information from the agent. + Info() AgentInfo + + // SetInfo sets the current information about the agent. + SetInfo(AgentInfo) + + // ExitCode returns true if the agent is exited and its exit code. + ExitCode(ctx context.Context) (bool, int, error) + + // Logs returns the logs from the agent starting at the given time + Logs(ctx context.Context, t time.Time) ([]byte, error) +} diff --git a/internal/agentdeployer/deployer.go b/internal/agentdeployer/deployer.go new file mode 100644 index 0000000000..daa6b37a7f --- /dev/null +++ b/internal/agentdeployer/deployer.go @@ -0,0 +1,15 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package agentdeployer + +import "context" + +// AgentDeployer defines the interface for deploying an agent. It defines methods for +// controlling the lifecycle of an agent. +type AgentDeployer interface { + // SetUp implements the logic for setting up an agent. It takes a context and returns a + // AgentHandler. + SetUp(context.Context, AgentInfo) (DeployedAgent, error) +} diff --git a/internal/agentdeployer/factory.go b/internal/agentdeployer/factory.go new file mode 100644 index 0000000000..1ef47d16fe --- /dev/null +++ b/internal/agentdeployer/factory.go @@ -0,0 +1,164 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package agentdeployer + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/elastic/elastic-package/internal/logger" + "github.com/elastic/elastic-package/internal/profile" +) + +const ( + TypeTest = "test" + TypeBench = "bench" +) + +// FactoryOptions defines options used to create an instance of a service deployer. +type FactoryOptions struct { + Profile *profile.Profile + + PackageRootPath string + DataStreamRootPath string + DevDeployDir string + Type string + StackVersion string + PolicyName string + + PackageName string + DataStream string + + Variant string + + RunTearDown bool + RunTestsOnly bool + RunSetup bool +} + +// Factory chooses the appropriate service runner for the given data stream, depending +// on service configuration files defined in the package or data stream. +func Factory(options FactoryOptions) (AgentDeployer, error) { + devDeployPath, err := FindDevDeployPath(options) + if err != nil { + return nil, fmt.Errorf("can't find \"%s\" directory: %w", options.DevDeployDir, err) + } + + agentDeployerName, err := findAgentDeployer(devDeployPath) + if err != nil { + logger.Debugf("Not found any agent deployer, using default one") + agentDeployerName = "default" + } + // if package defines `_dev/deploy/docker` folder to start their services, it should be + // using the default agent deployer` + if agentDeployerName == "docker" || agentDeployerName == "tf" { + agentDeployerName = "default" + } + + agentDeployerPath := filepath.Join(devDeployPath, agentDeployerName) + + switch agentDeployerName { + case "default": + if options.Type != TypeTest { + return nil, fmt.Errorf("agent deployer is not supported for type %s", options.Type) + } + variant, err := useAgentVariant(devDeployPath, options.Variant) + if err != nil { + return nil, fmt.Errorf("can't use service variant: %w", err) + } + opts := DockerComposeAgentDeployerOptions{ + Profile: options.Profile, + DockerComposeFile: "", // TODO: Allow other docker-compose files to apply overrides + Variant: variant, + StackVersion: options.StackVersion, + PackageName: options.PackageName, + PolicyName: options.PolicyName, + DataStream: options.DataStream, + RunTearDown: options.RunTearDown, + RunTestsOnly: options.RunTestsOnly, + } + return NewCustomAgentDeployer(opts) + case "agent": + // FIXME: should this be just carried out by service deployer? + // FIXME: this docker-compose scenario contains both agent and service + return nil, nil + // if options.Type != TypeTest { + // return nil, fmt.Errorf("agent deployer is not supported for type %s", options.Type) + // } + // customAgentCfgYMLPath := filepath.Join(agentDeployerPath, "custom-agent.yml") + // if _, err := os.Stat(customAgentCfgYMLPath); err != nil { + // return nil, fmt.Errorf("can't find expected file custom-agent.yml: %w", err) + // } + // sv, err := useAgentVariant(devDeployPath, options.Variant) + // if err != nil { + // return nil, fmt.Errorf("can't use service variant: %w", err) + // } + // opts := CustomAgentDeployerOptions{ + // Profile: options.Profile, + // DockerComposeFile: customAgentCfgYMLPath, + // StackVersion: options.StackVersion, + // Variant: sv, + // PackageName: options.PackageName, + // DataStream: options.DataStream, + // RunTearDown: options.RunTearDown, + // RunTestsOnly: options.RunTestsOnly, + // } + // return NewCustomAgentDeployer(opts) + case "k8s": + if _, err := os.Stat(agentDeployerPath); err == nil { + opts := KubernetesAgentDeployerOptions{ + Profile: options.Profile, + DefinitionsDir: agentDeployerPath, + StackVersion: options.StackVersion, + PolicyName: options.PolicyName, + RunSetup: options.RunSetup, + RunTestsOnly: options.RunTestsOnly, + RunTearDown: options.RunTearDown, + } + return NewKubernetesAgentDeployer(opts) + } + } + return nil, fmt.Errorf("unsupported agent deployer (name: %s)", agentDeployerName) +} + +// FindDevDeployPath function returns a path reference to the "_dev/deploy" directory. +func FindDevDeployPath(options FactoryOptions) (string, error) { + dataStreamDevDeployPath := filepath.Join(options.DataStreamRootPath, options.DevDeployDir) + if _, err := os.Stat(dataStreamDevDeployPath); err == nil { + return dataStreamDevDeployPath, nil + } else if !errors.Is(err, os.ErrNotExist) { + return "", fmt.Errorf("stat failed for data stream (path: %s): %w", dataStreamDevDeployPath, err) + } + + packageDevDeployPath := filepath.Join(options.PackageRootPath, options.DevDeployDir) + if _, err := os.Stat(packageDevDeployPath); err == nil { + return packageDevDeployPath, nil + } else if !errors.Is(err, os.ErrNotExist) { + return "", fmt.Errorf("stat failed for package (path: %s): %w", packageDevDeployPath, err) + } + + return "", fmt.Errorf("\"%s\" directory doesn't exist", options.DevDeployDir) +} + +func findAgentDeployer(devDeployPath string) (string, error) { + fis, err := os.ReadDir(devDeployPath) + if err != nil { + return "", fmt.Errorf("can't read directory (path: %s): %w", devDeployPath, err) + } + + var folders []os.DirEntry + for _, fi := range fis { + if fi.IsDir() { + folders = append(folders, fi) + } + } + + if len(folders) != 1 { + return "", fmt.Errorf("expected to find only one agent deployer in \"%s\"", devDeployPath) + } + return folders[0].Name(), nil +} diff --git a/internal/agentdeployer/info.go b/internal/agentdeployer/info.go new file mode 100644 index 0000000000..4529286338 --- /dev/null +++ b/internal/agentdeployer/info.go @@ -0,0 +1,81 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package agentdeployer + +const ( + localCACertEnv = "LOCAL_CA_CERT" + serviceLogsDirEnv = "SERVICE_LOGS_DIR" + testRunIDEnv = "TEST_RUN_ID" + fleetPolicyEnv = "FLEET_TOKEN_POLICY_NAME" + agentHostnameEnv = "AGENT_HOSTNAME" + elasticAgentTagsEnv = "ELASTIC_AGENT_TAGS" +) + +// AgentInfo encapsulates context that is both available to a AgentDeployer and +// populated by a DeployedAgent. The fields in AgentInfo may be used in handlebars +// templates in system test configuration files, for example: {{ Hostname }}. +type AgentInfo struct { + // Name is the name of the service. + Name string + + // Hostname is the host name of the service, as addressable from + // the Agent container. + Hostname string + + // NetworkName is the name of the docker network created for the agent, + // required to connect the Service with the agent. + NetworkName string + + // Agent Policy related properties + Policy struct { + // Name is the name of the test Agent Policy created for the given agent + Name string + // ID is the name of the test Agent Policy created for the given agent + ID string + } + + // Ports is a list of ports that the service listens on, as addressable + // from the Agent container. + Ports []int + + // Port points to the first port in the list of ports. It's provided as + // a convenient shortcut as most services tend to listen on a single port. + Port int + + // Logs contains folder paths for log files produced by the service. + Logs struct { + Folder struct { + // Local contains the folder path where log files produced by + // the service are stored on the local filesystem, i.e. where + // elastic-package is running. + Local string + + // Agent contains the folder path where log files produced by + // the service are stored on the Agent container's filesystem. + Agent string + } + } + + // Test related properties. + Test struct { + // RunID identifies the current test run. + RunID string + } + + // Agent related properties. + Agent struct { + // Host describes the machine which is running the agent. + Host struct { + // Name prefix for the host's name + NamePrefix string + } + } + + // CustomProperties store additional data used to boot up the service, e.g. AWS credentials. + CustomProperties map[string]interface{} + + // Directory to store agent configuration files + ConfigDir string +} diff --git a/internal/agentdeployer/kubernetes.go b/internal/agentdeployer/kubernetes.go new file mode 100644 index 0000000000..01aa7052be --- /dev/null +++ b/internal/agentdeployer/kubernetes.go @@ -0,0 +1,214 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package agentdeployer + +import ( + "bytes" + "context" + _ "embed" + "encoding/base64" + "fmt" + "os" + "strings" + "text/template" + "time" + + "github.com/elastic/elastic-package/internal/install" + "github.com/elastic/elastic-package/internal/kind" + "github.com/elastic/elastic-package/internal/kubectl" + "github.com/elastic/elastic-package/internal/logger" + "github.com/elastic/elastic-package/internal/profile" + "github.com/elastic/elastic-package/internal/stack" +) + +// KubernetesAgentDeployer is responsible for deploying resources in the Kubernetes cluster. +type KubernetesAgentDeployer struct { + profile *profile.Profile + definitionsDir string + stackVersion string + policyName string + + runSetup bool + runTestsOnly bool + runTearDown bool +} + +type KubernetesAgentDeployerOptions struct { + Profile *profile.Profile + DefinitionsDir string + StackVersion string + PolicyName string + + RunSetup bool + RunTestsOnly bool + RunTearDown bool +} + +type kubernetesDeployedAgent struct { + agentInfo AgentInfo + profile *profile.Profile + stackVersion string + + definitionsDir string +} + +func (s kubernetesDeployedAgent) TearDown(ctx context.Context) error { + elasticAgentManagedYaml, err := getElasticAgentYAML(s.profile, s.stackVersion, s.agentInfo.Policy.Name) + if err != nil { + return fmt.Errorf("can't retrieve Kubernetes file for Elastic Agent: %w", err) + } + err = kubectl.DeleteStdin(ctx, elasticAgentManagedYaml) + if err != nil { + return fmt.Errorf("can't uninstall Kubernetes resources (path: %s): %w", s.definitionsDir, err) + } + return nil +} + +func (s kubernetesDeployedAgent) ExitCode(ctx context.Context) (bool, int, error) { + return false, -1, ErrNotSupported +} + +func (s kubernetesDeployedAgent) Info() AgentInfo { + return s.agentInfo +} + +func (s *kubernetesDeployedAgent) SetInfo(info AgentInfo) { + s.agentInfo = info +} + +// Logs returns the logs from the agent starting at the given time +func (s *kubernetesDeployedAgent) Logs(ctx context.Context, t time.Time) ([]byte, error) { + return nil, nil +} + +var _ DeployedAgent = new(kubernetesDeployedAgent) + +// NewKubernetesAgentDeployer function creates a new instance of KubernetesAgentDeployer. +func NewKubernetesAgentDeployer(opts KubernetesAgentDeployerOptions) (*KubernetesAgentDeployer, error) { + return &KubernetesAgentDeployer{ + profile: opts.Profile, + definitionsDir: opts.DefinitionsDir, + stackVersion: opts.StackVersion, + policyName: opts.PolicyName, + runSetup: opts.RunSetup, + runTestsOnly: opts.RunTestsOnly, + runTearDown: opts.RunTearDown, + }, nil +} + +// SetUp function links the kind container with elastic-package-stack network, installs Elastic-Agent and optionally +// custom YAML definitions. +func (ksd KubernetesAgentDeployer) SetUp(ctx context.Context, agentInfo AgentInfo) (DeployedAgent, error) { + err := kind.VerifyContext(ctx) + if err != nil { + return nil, fmt.Errorf("kind context verification failed: %w", err) + } + + if ksd.runTearDown || ksd.runTestsOnly { + logger.Debug("Skip connect kind to Elastic stack network") + } else { + err = kind.ConnectToElasticStackNetwork(ksd.profile) + if err != nil { + return nil, fmt.Errorf("can't connect control plane to Elastic stack network: %w", err) + } + } + + if ksd.runTearDown || ksd.runTestsOnly { + logger.Debug("Skip install Elastic Agent in cluster") + } else { + err = installElasticAgentInCluster(ctx, ksd.profile, ksd.stackVersion, agentInfo.Policy.Name) + if err != nil { + return nil, fmt.Errorf("can't install Elastic-Agent in the Kubernetes cluster: %w", err) + } + } + + agentInfo.Name = kind.ControlPlaneContainerName + agentInfo.Hostname = kind.ControlPlaneContainerName + // kind-control-plane is the name of the kind host where Pod is running since we use hostNetwork setting + // to deploy Agent Pod. Because of this, hostname inside pod will be equal to the name of the k8s host. + agentInfo.Agent.Host.NamePrefix = "kind-control-plane" + return &kubernetesDeployedAgent{ + agentInfo: agentInfo, + definitionsDir: ksd.definitionsDir, + profile: ksd.profile, + stackVersion: ksd.stackVersion, + }, nil +} + +var _ AgentDeployer = new(KubernetesAgentDeployer) + +func installElasticAgentInCluster(ctx context.Context, profile *profile.Profile, stackVersion, policyName string) error { + logger.Debug("install Elastic Agent in the Kubernetes cluster") + + elasticAgentManagedYaml, err := getElasticAgentYAML(profile, stackVersion, policyName) + if err != nil { + return fmt.Errorf("can't retrieve Kubernetes file for Elastic Agent: %w", err) + } + + err = kubectl.ApplyStdin(ctx, elasticAgentManagedYaml) + if err != nil { + return fmt.Errorf("can't install Elastic-Agent in Kubernetes cluster: %w", err) + } + + // DEBUG DaemonSet is not ready: kube-system/elastic-agent. 0 out of 1 expected pods have been scheduled + + return nil +} + +//go:embed _static/elastic-agent-managed.yaml.tmpl +var elasticAgentManagedYamlTmpl string + +func getElasticAgentYAML(profile *profile.Profile, stackVersion, policyName string) ([]byte, error) { + logger.Debugf("Prepare YAML definition for Elastic Agent running in stack v%s", stackVersion) + + appConfig, err := install.Configuration() + if err != nil { + return nil, fmt.Errorf("can't read application configuration: %w", err) + } + + caCert, err := readCACertBase64(profile) + if err != nil { + return nil, fmt.Errorf("can't read certificate authority file: %w", err) + } + + tmpl := template.Must(template.New("elastic-agent.yml").Parse(elasticAgentManagedYamlTmpl)) + + var elasticAgentYaml bytes.Buffer + err = tmpl.Execute(&elasticAgentYaml, map[string]string{ + "fleetURL": "https://fleet-server:8220", + "kibanaURL": "https://kibana:5601", + "caCertPem": caCert, + "elasticAgentImage": appConfig.StackImageRefs(stackVersion).ElasticAgent, + "elasticAgentTokenPolicyName": getTokenPolicyName(stackVersion, policyName), + }) + if err != nil { + return nil, fmt.Errorf("can't generate elastic agent manifest: %w", err) + } + + return elasticAgentYaml.Bytes(), nil +} + +func readCACertBase64(profile *profile.Profile) (string, error) { + caCertPath, err := stack.FindCACertificate(profile) + if err != nil { + return "", fmt.Errorf("can't locate CA certificate: %w", err) + } + + d, err := os.ReadFile(caCertPath) + if err != nil { + return "", err + } + + return base64.StdEncoding.EncodeToString(d), nil +} + +// getTokenPolicyName function returns the policy name for the 8.x Elastic stack. The agent's policy +// is predefined in the Kibana configuration file. The logic is not present in older stacks. +func getTokenPolicyName(stackVersion, policyName string) string { + if strings.HasPrefix(stackVersion, "8.") { + return policyName + } + return "" +} diff --git a/internal/agentdeployer/logs.go b/internal/agentdeployer/logs.go new file mode 100644 index 0000000000..3d23f6219f --- /dev/null +++ b/internal/agentdeployer/logs.go @@ -0,0 +1,55 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. +package agentdeployer + +import ( + "context" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/elastic/elastic-package/internal/builder" + "github.com/elastic/elastic-package/internal/compose" + "github.com/elastic/elastic-package/internal/logger" +) + +func processAgentContainerLogs(ctx context.Context, p *compose.Project, opts compose.CommandOptions, agentName string) { + content, err := p.Logs(ctx, opts) + if err != nil { + logger.Errorf("can't export service logs: %v", err) + return + } + + if len(content) == 0 { + logger.Info("service container hasn't written anything logs.") + return + } + + err = writeAgentContainerLogs(agentName, content) + if err != nil { + logger.Errorf("can't write service container logs: %v", err) + } +} + +func writeAgentContainerLogs(agentName string, content []byte) error { + buildDir, err := builder.BuildDirectory() + if err != nil { + return fmt.Errorf("locating build directory failed: %w", err) + } + + containerLogsDir := filepath.Join(buildDir, "container-logs") + err = os.MkdirAll(containerLogsDir, 0o755) + if err != nil { + return fmt.Errorf("can't create directory for agent container logs (path: %s): %w", containerLogsDir, err) + } + + containerLogsFilepath := filepath.Join(containerLogsDir, fmt.Sprintf("%s-%d.log", agentName, time.Now().UnixNano())) + logger.Infof("Write container logs to file: %s", containerLogsFilepath) + err = os.WriteFile(containerLogsFilepath, content, 0o644) + if err != nil { + return fmt.Errorf("can't write container logs to file (path: %s): %w", containerLogsFilepath, err) + } + return nil +} diff --git a/internal/agentdeployer/variants.go b/internal/agentdeployer/variants.go new file mode 100644 index 0000000000..3326c7e8b7 --- /dev/null +++ b/internal/agentdeployer/variants.go @@ -0,0 +1,98 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package agentdeployer + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +// VariantsFile describes different variants of the service under test. +type VariantsFile struct { + Default string `yaml:"default"` + Variants map[string]Environment +} + +// Environment is a key-value map storing environment variables. +type Environment map[string]string + +// AgentVariant describes a variant of the service using Environment variables. +type AgentVariant struct { + Name string + Env []string // Environment variables in format of pairs: key=value +} + +// String method returns a string representation of the service variant. +func (sv *AgentVariant) String() string { + return fmt.Sprintf("AgentVariant{Name: %s, Env: %s}", sv.Name, strings.Join(sv.Env, ",")) +} + +func (sv *AgentVariant) active() bool { + return sv.Name != "" +} + +// ReadVariantsFile function reads available service variants. +func ReadVariantsFile(devDeployPath string) (*VariantsFile, error) { + variantsYmlPath := filepath.Join(devDeployPath, "variants.yml") + _, err := os.Stat(variantsYmlPath) + if errors.Is(err, os.ErrNotExist) { + return nil, os.ErrNotExist + } + if err != nil { + return nil, fmt.Errorf("can't stat variants file: %w", err) + } + + content, err := os.ReadFile(variantsYmlPath) + if err != nil { + return nil, fmt.Errorf("can't read variants file: %w", err) + } + + var f VariantsFile + err = yaml.Unmarshal(content, &f) + if err != nil { + return nil, fmt.Errorf("can't unmarshal variants file: %w", err) + } + return &f, nil +} + +func useAgentVariant(devDeployPath, selected string) (AgentVariant, error) { + f, err := ReadVariantsFile(devDeployPath) + if errors.Is(err, os.ErrNotExist) { + return AgentVariant{}, nil // no "variants.yml" present + } else if err != nil { + return AgentVariant{}, err + } + + if selected == "" { + selected = f.Default + } + + if f.Default == "" { + return AgentVariant{}, errors.New("default variant is undefined") + } + + env, ok := f.Variants[selected] + if !ok { + return AgentVariant{}, fmt.Errorf(`variant "%s" is missing`, selected) + } + + return AgentVariant{ + Name: selected, + Env: asEnvVarPairs(env), + }, nil +} + +func asEnvVarPairs(env Environment) []string { + var pairs []string + for k, v := range env { + pairs = append(pairs, fmt.Sprintf("%s=%s", k, v)) + } + return pairs +} diff --git a/internal/benchrunner/runners/system/runner.go b/internal/benchrunner/runners/system/runner.go index 723979da58..206cfdf22a 100644 --- a/internal/benchrunner/runners/system/runner.go +++ b/internal/benchrunner/runners/system/runner.go @@ -240,12 +240,13 @@ func (r *runner) run(ctx context.Context) (report reporters.Reportable, err erro logger.Debug("setting up service...") devDeployDir := filepath.Clean(filepath.Join(r.options.BenchPath, "deploy")) opts := servicedeployer.FactoryOptions{ - PackageRootPath: r.options.PackageRootPath, - DevDeployDir: devDeployDir, - Variant: r.options.Variant, - Profile: r.options.Profile, - Type: servicedeployer.TypeBench, - StackVersion: stackVersion.Version(), + PackageRootPath: r.options.PackageRootPath, + DevDeployDir: devDeployDir, + Variant: r.options.Variant, + Profile: r.options.Profile, + Type: servicedeployer.TypeBench, + StackVersion: stackVersion.Version(), + DeployIndependentAgent: false, } serviceDeployer, err := servicedeployer.Factory(opts) diff --git a/internal/configuration/locations/locations.go b/internal/configuration/locations/locations.go index 3f4fba0fbd..bd93d4bb4c 100644 --- a/internal/configuration/locations/locations.go +++ b/internal/configuration/locations/locations.go @@ -98,6 +98,11 @@ func (loc LocationManager) ServiceLogDir() string { return filepath.Join(loc.stackPath, serviceLogsDir) } +// ServiceLogDirPerAgent returns the log directory assigned to an specific agent +func (loc LocationManager) ServiceLogDirPerAgent(name string) string { + return filepath.Join(loc.stackPath, serviceLogsDir, name) +} + // ServiceOutputDir returns the output directory func (loc LocationManager) ServiceOutputDir() string { return filepath.Join(loc.stackPath, serviceOutputDir) diff --git a/internal/kibana/agents.go b/internal/kibana/agents.go index 07b949563d..5014a852fa 100644 --- a/internal/kibana/agents.go +++ b/internal/kibana/agents.go @@ -35,6 +35,7 @@ type Agent struct { } `json:"agent"` } `json:"elastic"` } `json:"local_metadata"` + Status string `json:"status"` } // String method returns string representation of an agent. @@ -89,6 +90,23 @@ func (c *Client) AssignPolicyToAgent(ctx context.Context, a Agent, p Policy) err return nil } +// RemoveAgent unenrolls the given agent +func (c *Client) RemoveAgent(ctx context.Context, a Agent) error { + reqBody := `{ "revoke": true, "force": true }` + + path := fmt.Sprintf("%s/agents/%s/unenroll", FleetAPI, a.ID) + statusCode, respBody, err := c.post(ctx, path, []byte(reqBody)) + if err != nil { + return fmt.Errorf("could not enroll agent: %w", err) + } + + if statusCode != http.StatusOK { + return fmt.Errorf("could not enroll agent; API status code = %d; response body = %s", statusCode, respBody) + } + + return nil +} + func (c *Client) waitUntilPolicyAssigned(ctx context.Context, a Agent, p Policy) error { ctx, cancel := context.WithTimeout(ctx, waitForPolicyAssignedTimeout) defer cancel() diff --git a/internal/kubectl/kubectl.go b/internal/kubectl/kubectl.go index 7d224148f3..52682c0e87 100644 --- a/internal/kubectl/kubectl.go +++ b/internal/kubectl/kubectl.go @@ -74,3 +74,21 @@ func applyKubernetesResourcesStdin(ctx context.Context, input []byte) ([]byte, e } return output, nil } + +// deleteKubernetesResourcesStdin deletes a Kubernetes manifest provided as stdin. +// It returns the resources deleted as output and an error +func deleteKubernetesResourcesStdin(ctx context.Context, input []byte) ([]byte, error) { + // create kubectl apply command + kubectlCmd := exec.CommandContext(ctx, "kubectl", "delete", "-f", "-") + // Stdin of kubectl command is the manifest provided + kubectlCmd.Stdin = bytes.NewReader(input) + errOutput := new(bytes.Buffer) + kubectlCmd.Stderr = errOutput + + logger.Debugf("run command: %s", kubectlCmd) + output, err := kubectlCmd.Output() + if err != nil { + return nil, fmt.Errorf("kubectl delete failed (stderr=%q): %w", errOutput.String(), err) + } + return output, nil +} diff --git a/internal/kubectl/kubectl_delete.go b/internal/kubectl/kubectl_delete.go index 6f3c8ea72f..a10c65142f 100644 --- a/internal/kubectl/kubectl_delete.go +++ b/internal/kubectl/kubectl_delete.go @@ -4,10 +4,21 @@ package kubectl -import "context" +import ( + "context" + + "github.com/elastic/elastic-package/internal/logger" +) // Delete function removes resources from the Kubernetes cluster based on provided definitions. func Delete(ctx context.Context, definitionsPath []string) error { _, err := modifyKubernetesResources(ctx, "delete", definitionsPath) return err } + +// DeleteStdin function removes resources from the Kubernetes cluster based on provided definitions. +func DeleteStdin(ctx context.Context, out []byte) error { + logger.Debugf("Delete Kubernetes stdin") + _, err := deleteKubernetesResourcesStdin(ctx, out) + return err +} diff --git a/internal/service/boot.go b/internal/service/boot.go index 17ac0c439f..bea9542653 100644 --- a/internal/service/boot.go +++ b/internal/service/boot.go @@ -36,12 +36,13 @@ type Options struct { func BootUp(ctx context.Context, options Options) error { logger.Debugf("Create new instance of the service deployer") serviceDeployer, err := servicedeployer.Factory(servicedeployer.FactoryOptions{ - Profile: options.Profile, - PackageRootPath: options.DataStreamRootPath, - DataStreamRootPath: options.DataStreamRootPath, - DevDeployDir: options.DevDeployDir, - Variant: options.Variant, - StackVersion: options.StackVersion, + Profile: options.Profile, + PackageRootPath: options.DataStreamRootPath, + DataStreamRootPath: options.DataStreamRootPath, + DevDeployDir: options.DevDeployDir, + Variant: options.Variant, + StackVersion: options.StackVersion, + DeployIndependentAgent: false, }) if err != nil { return fmt.Errorf("can't create the service deployer instance: %w", err) diff --git a/internal/servicedeployer/_static/docker-custom-agent-base.yml b/internal/servicedeployer/_static/docker-custom-agent-base.yml index 610ef273df..84a1fcefac 100644 --- a/internal/servicedeployer/_static/docker-custom-agent-base.yml +++ b/internal/servicedeployer/_static/docker-custom-agent-base.yml @@ -11,6 +11,7 @@ services: - FLEET_ENROLL=1 - FLEET_URL=https://fleet-server:8220 - KIBANA_HOST=https://kibana:5601 + - FLEET_TOKEN_POLICY_NAME=${FLEET_TOKEN_POLICY_NAME} volumes: - ${SERVICE_LOGS_DIR}:/tmp/service_logs/ - ${LOCAL_CA_CERT}:/etc/ssl/certs/elastic-package.pem diff --git a/internal/servicedeployer/elastic-agent-managed.yaml.tmpl b/internal/servicedeployer/_static/elastic-agent-managed.yaml.tmpl similarity index 100% rename from internal/servicedeployer/elastic-agent-managed.yaml.tmpl rename to internal/servicedeployer/_static/elastic-agent-managed.yaml.tmpl diff --git a/internal/servicedeployer/compose.go b/internal/servicedeployer/compose.go index ca9b0e5426..1e11b0fcee 100644 --- a/internal/servicedeployer/compose.go +++ b/internal/servicedeployer/compose.go @@ -28,6 +28,8 @@ type DockerComposeServiceDeployer struct { ymlPaths []string variant ServiceVariant + deployIndependentAgent bool + runTearDown bool runTestsOnly bool } @@ -37,6 +39,8 @@ type DockerComposeServiceDeployerOptions struct { YmlPaths []string Variant ServiceVariant + DeployIndependentAgent bool + RunTearDown bool RunTestsOnly bool } @@ -52,14 +56,17 @@ type dockerComposeDeployedService struct { env []string } +var _ ServiceDeployer = new(DockerComposeServiceDeployer) + // NewDockerComposeServiceDeployer returns a new instance of a DockerComposeServiceDeployer. func NewDockerComposeServiceDeployer(options DockerComposeServiceDeployerOptions) (*DockerComposeServiceDeployer, error) { return &DockerComposeServiceDeployer{ - profile: options.Profile, - ymlPaths: options.YmlPaths, - variant: options.Variant, - runTearDown: options.RunTearDown, - runTestsOnly: options.RunTestsOnly, + profile: options.Profile, + ymlPaths: options.YmlPaths, + variant: options.Variant, + runTearDown: options.RunTearDown, + runTestsOnly: options.RunTestsOnly, + deployIndependentAgent: options.DeployIndependentAgent, }, nil } @@ -70,7 +77,9 @@ func (d *DockerComposeServiceDeployer) SetUp(ctx context.Context, svcInfo Servic ymlPaths: d.ymlPaths, project: "elastic-package-service", variant: d.variant, - env: []string{fmt.Sprintf("%s=%s", serviceLogsDirEnv, svcInfo.Logs.Folder.Local)}, + env: []string{ + fmt.Sprintf("%s=%s", serviceLogsDirEnv, svcInfo.Logs.Folder.Local), + }, } p, err := compose.NewProject(service.project, service.ymlPaths...) @@ -130,10 +139,18 @@ func (d *DockerComposeServiceDeployer) SetUp(ctx context.Context, svcInfo Servic if d.runTearDown || d.runTestsOnly { logger.Debug("Skipping connect container to network (non setup steps)") } else { - // Connect service network with stack network (for the purpose of metrics collection) - err = docker.ConnectToNetwork(p.ContainerName(serviceName), stack.Network(d.profile)) - if err != nil { - return nil, fmt.Errorf("can't attach service container to the stack network: %w", err) + if d.deployIndependentAgent { + // Connect service network with agent network + err = docker.ConnectToNetwork(p.ContainerName(serviceName), svcInfo.AgentNetworkName) + if err != nil { + return nil, fmt.Errorf("can't attach service container to the agent network: %w", err) + } + } else { + // Connect service network with stack network (for the purpose of metrics collection) + err = docker.ConnectToNetwork(p.ContainerName(serviceName), stack.Network(d.profile)) + if err != nil { + return nil, fmt.Errorf("can't attach service container to the stack network: %w", err) + } } } @@ -142,7 +159,7 @@ func (d *DockerComposeServiceDeployer) SetUp(ctx context.Context, svcInfo Servic logger.Debugf("adding service container %s internal ports to context", p.ContainerName(serviceName)) serviceComposeConfig, err := p.Config(ctx, compose.CommandOptions{ - Env: []string{fmt.Sprintf("%s=%s", serviceLogsDirEnv, svcInfo.Logs.Folder.Local)}, + Env: service.env, }) if err != nil { return nil, fmt.Errorf("could not get Docker Compose configuration for service: %w", err) diff --git a/internal/servicedeployer/custom_agent.go b/internal/servicedeployer/custom_agent.go index c33638b3e7..f6cc5e2594 100644 --- a/internal/servicedeployer/custom_agent.go +++ b/internal/servicedeployer/custom_agent.go @@ -36,6 +36,7 @@ type CustomAgentDeployer struct { profile *profile.Profile dockerComposeFile string stackVersion string + policyName string runTearDown bool runTestsOnly bool @@ -45,17 +46,21 @@ type CustomAgentDeployerOptions struct { Profile *profile.Profile DockerComposeFile string StackVersion string + PolicyName string RunTearDown bool RunTestsOnly bool } +var _ ServiceDeployer = new(CustomAgentDeployer) + // NewCustomAgentDeployer returns a new instance of a deployedCustomAgent. func NewCustomAgentDeployer(options CustomAgentDeployerOptions) (*CustomAgentDeployer, error) { return &CustomAgentDeployer{ profile: options.Profile, dockerComposeFile: options.DockerComposeFile, stackVersion: options.StackVersion, + policyName: options.PolicyName, runTearDown: options.RunTearDown, runTestsOnly: options.RunTestsOnly, }, nil @@ -75,10 +80,18 @@ func (d *CustomAgentDeployer) SetUp(ctx context.Context, svcInfo ServiceInfo) (D return nil, fmt.Errorf("can't locate CA certificate: %w", err) } + // Build service container name + // FIXME: Currently, this service deployer starts a new agent on its own and + // it cannot use directly the `svcInfo.AgentHostname` value + + // Set alias for custom agent + svcInfo.Hostname = dockerCustomAgentName + env := append( appConfig.StackImageRefs(d.stackVersion).AsEnv(), fmt.Sprintf("%s=%s", serviceLogsDirEnv, svcInfo.Logs.Folder.Local), fmt.Sprintf("%s=%s", localCACertEnv, caCertPath), + fmt.Sprintf("%s=%s", fleetPolicyEnv, d.policyName), ) configDir, err := d.installDockerfile() @@ -139,6 +152,10 @@ func (d *CustomAgentDeployer) SetUp(ctx context.Context, svcInfo ServiceInfo) (D if err != nil { return nil, fmt.Errorf("could not boot up service using Docker Compose: %w", err) } + + // TODO: if this agent is moved to "agentdeployer", this container should be connected + // to the network of the agent as done in servicedeployer/compose.go + // Connect service network with stack network (for the purpose of metrics collection) err = docker.ConnectToNetwork(p.ContainerName(serviceName), stack.Network(d.profile)) if err != nil { @@ -155,9 +172,6 @@ func (d *CustomAgentDeployer) SetUp(ctx context.Context, svcInfo ServiceInfo) (D return nil, fmt.Errorf("service is unhealthy: %w", err) } - // Build service container name - svcInfo.Hostname = p.ContainerName(serviceName) - logger.Debugf("adding service container %s internal ports to context", p.ContainerName(serviceName)) serviceComposeConfig, err := p.Config(ctx, compose.CommandOptions{Env: env}) if err != nil { diff --git a/internal/servicedeployer/factory.go b/internal/servicedeployer/factory.go index 4fe8680901..a25c54cb4c 100644 --- a/internal/servicedeployer/factory.go +++ b/internal/servicedeployer/factory.go @@ -22,11 +22,17 @@ const ( type FactoryOptions struct { Profile *profile.Profile - PackageRootPath string - DataStreamRootPath string - DevDeployDir string - Type string - StackVersion string + PackageRootPath string + DataStreamRootPath string + DevDeployDir string + Type string + StackVersion string + DeployIndependentAgent bool + + PolicyName string + + PackageName string + DataStream string Variant string @@ -54,12 +60,13 @@ func Factory(options FactoryOptions) (ServiceDeployer, error) { case "k8s": if _, err := os.Stat(serviceDeployerPath); err == nil { opts := KubernetesServiceDeployerOptions{ - Profile: options.Profile, - DefinitionsDir: serviceDeployerPath, - StackVersion: options.StackVersion, - RunSetup: options.RunSetup, - RunTestsOnly: options.RunTestsOnly, - RunTearDown: options.RunTearDown, + Profile: options.Profile, + DefinitionsDir: serviceDeployerPath, + StackVersion: options.StackVersion, + RunSetup: options.RunSetup, + RunTestsOnly: options.RunTestsOnly, + RunTearDown: options.RunTearDown, + DeployIndependentAgent: options.DeployIndependentAgent, } return NewKubernetesServiceDeployer(opts) } @@ -71,15 +78,17 @@ func Factory(options FactoryOptions) (ServiceDeployer, error) { return nil, fmt.Errorf("can't use service variant: %w", err) } opts := DockerComposeServiceDeployerOptions{ - Profile: options.Profile, - YmlPaths: []string{dockerComposeYMLPath}, - Variant: sv, - RunTearDown: options.RunTearDown, - RunTestsOnly: options.RunTestsOnly, + Profile: options.Profile, + YmlPaths: []string{dockerComposeYMLPath}, + Variant: sv, + RunTearDown: options.RunTearDown, + RunTestsOnly: options.RunTestsOnly, + DeployIndependentAgent: options.DeployIndependentAgent, } return NewDockerComposeServiceDeployer(opts) } case "agent": + // FIXME: This docker-compose scenario contains also the definition of the elastic-agent container if options.Type != TypeTest { return nil, fmt.Errorf("agent deployer is not supported for type %s", options.Type) } @@ -87,12 +96,16 @@ func Factory(options FactoryOptions) (ServiceDeployer, error) { if _, err := os.Stat(customAgentCfgYMLPath); err != nil { return nil, fmt.Errorf("can't find expected file custom-agent.yml: %w", err) } + policyName := getTokenPolicyName(options.StackVersion, options.PolicyName) + opts := CustomAgentDeployerOptions{ Profile: options.Profile, DockerComposeFile: customAgentCfgYMLPath, StackVersion: options.StackVersion, - RunTearDown: options.RunTearDown, - RunTestsOnly: options.RunTestsOnly, + PolicyName: policyName, + + RunTearDown: options.RunTearDown, + RunTestsOnly: options.RunTestsOnly, } return NewCustomAgentDeployer(opts) case "tf": @@ -100,7 +113,10 @@ func Factory(options FactoryOptions) (ServiceDeployer, error) { return nil, errors.New("terraform service deployer not supported to run by steps") } if _, err := os.Stat(serviceDeployerPath); err == nil { - return NewTerraformServiceDeployer(serviceDeployerPath) + opts := TerraformServiceDeployerOptions{ + DefinitionsDir: serviceDeployerPath, + } + return NewTerraformServiceDeployer(opts) } } return nil, fmt.Errorf("unsupported service deployer (name: %s)", serviceDeployerName) diff --git a/internal/servicedeployer/info.go b/internal/servicedeployer/info.go index d8a6dd1be4..252e743798 100644 --- a/internal/servicedeployer/info.go +++ b/internal/servicedeployer/info.go @@ -5,9 +5,13 @@ package servicedeployer const ( - localCACertEnv = "LOCAL_CA_CERT" - serviceLogsDirEnv = "SERVICE_LOGS_DIR" - testRunIDEnv = "TEST_RUN_ID" + localCACertEnv = "LOCAL_CA_CERT" + serviceLogsDirEnv = "SERVICE_LOGS_DIR" + testRunIDEnv = "TEST_RUN_ID" + elasticAgentTagsEnv = "ELASTIC_AGENT_TAGS" + fleetPolicyEnv = "FLEET_TOKEN_POLICY_NAME" + + defaulFleetTokenPolicyName = "Elastic-Agent (elastic-package)" ) // ServiceInfo encapsulates context that is both available to a ServiceDeployer and @@ -21,6 +25,9 @@ type ServiceInfo struct { // the Agent container. Hostname string + // AgentNetworkName is the network name where the agent is running. + AgentNetworkName string + // Ports is a list of ports that the service listens on, as addressable // from the Agent container. Ports []int diff --git a/internal/servicedeployer/kubernetes.go b/internal/servicedeployer/kubernetes.go index 543044efee..537836e098 100644 --- a/internal/servicedeployer/kubernetes.go +++ b/internal/servicedeployer/kubernetes.go @@ -29,6 +29,8 @@ type KubernetesServiceDeployer struct { definitionsDir string stackVersion string + deployIndependentAgent bool + runSetup bool runTestsOnly bool runTearDown bool @@ -39,6 +41,8 @@ type KubernetesServiceDeployerOptions struct { DefinitionsDir string StackVersion string + DeployIndependentAgent bool + RunSetup bool RunTestsOnly bool RunTearDown bool @@ -92,12 +96,13 @@ var _ DeployedService = new(kubernetesDeployedService) // NewKubernetesServiceDeployer function creates a new instance of KubernetesServiceDeployer. func NewKubernetesServiceDeployer(opts KubernetesServiceDeployerOptions) (*KubernetesServiceDeployer, error) { return &KubernetesServiceDeployer{ - profile: opts.Profile, - definitionsDir: opts.DefinitionsDir, - stackVersion: opts.StackVersion, - runSetup: opts.RunSetup, - runTestsOnly: opts.RunTestsOnly, - runTearDown: opts.RunTearDown, + profile: opts.Profile, + definitionsDir: opts.DefinitionsDir, + stackVersion: opts.StackVersion, + runSetup: opts.RunSetup, + runTestsOnly: opts.RunTestsOnly, + runTearDown: opts.RunTearDown, + deployIndependentAgent: opts.DeployIndependentAgent, }, nil } @@ -118,7 +123,7 @@ func (ksd KubernetesServiceDeployer) SetUp(ctx context.Context, svcInfo ServiceI } } - if ksd.runTearDown || ksd.runTestsOnly { + if ksd.runTearDown || ksd.runTestsOnly || ksd.deployIndependentAgent { logger.Debug("Skip install Elastic Agent in cluster") } else { err = installElasticAgentInCluster(ctx, ksd.profile, ksd.stackVersion) @@ -193,7 +198,7 @@ func installElasticAgentInCluster(ctx context.Context, profile *profile.Profile, return nil } -//go:embed elastic-agent-managed.yaml.tmpl +//go:embed _static/elastic-agent-managed.yaml.tmpl var elasticAgentManagedYamlTmpl string func getElasticAgentYAML(profile *profile.Profile, stackVersion string) ([]byte, error) { @@ -217,7 +222,7 @@ func getElasticAgentYAML(profile *profile.Profile, stackVersion string) ([]byte, "kibanaURL": "https://kibana:5601", "caCertPem": caCert, "elasticAgentImage": appConfig.StackImageRefs(stackVersion).ElasticAgent, - "elasticAgentTokenPolicyName": getTokenPolicyName(stackVersion), + "elasticAgentTokenPolicyName": getTokenPolicyName(stackVersion, defaulFleetTokenPolicyName), }) if err != nil { return nil, fmt.Errorf("can't generate elastic agent manifest: %w", err) @@ -242,9 +247,12 @@ func readCACertBase64(profile *profile.Profile) (string, error) { // getTokenPolicyName function returns the policy name for the 8.x Elastic stack. The agent's policy // is predefined in the Kibana configuration file. The logic is not present in older stacks. -func getTokenPolicyName(stackVersion string) string { +func getTokenPolicyName(stackVersion, policyName string) string { + if policyName == "" { + policyName = defaulFleetTokenPolicyName + } if strings.HasPrefix(stackVersion, "8.") { - return "Elastic-Agent (elastic-package)" + return policyName } return "" } diff --git a/internal/servicedeployer/terraform.go b/internal/servicedeployer/terraform.go index 9c0be72e1f..ef309e0d3e 100644 --- a/internal/servicedeployer/terraform.go +++ b/internal/servicedeployer/terraform.go @@ -28,7 +28,7 @@ const ( terraformDeployerDockerfile = "Dockerfile" terraformDeployerRun = "run.sh" terraformOutputPrefix = "TF_OUTPUT_" - terraformOutputJsonFile = "tfOutputValues.json" + terraformOutputJSONFile = "tfOutputValues.json" ) //go:embed _static/terraform_deployer.yml @@ -45,12 +45,16 @@ type TerraformServiceDeployer struct { definitionsDir string } +type TerraformServiceDeployerOptions struct { + DefinitionsDir string +} + // addTerraformOutputs method reads the terraform outputs generated in the json format and // adds them to the custom properties of ServiceInfo and can be used in the handlebars template // like `{{TF_OUTPUT_queue_url}}` where `queue_url` is the output configured func addTerraformOutputs(svcInfo *ServiceInfo) error { // Read the `output.json` file where terraform outputs are generated - outputFile := filepath.Join(svcInfo.OutputDir, terraformOutputJsonFile) + outputFile := filepath.Join(svcInfo.OutputDir, terraformOutputJSONFile) content, err := os.ReadFile(outputFile) if err != nil { return fmt.Errorf("failed to read terraform output file: %w", err) @@ -83,9 +87,9 @@ func addTerraformOutputs(svcInfo *ServiceInfo) error { } // NewTerraformServiceDeployer creates an instance of TerraformServiceDeployer. -func NewTerraformServiceDeployer(definitionsDir string) (*TerraformServiceDeployer, error) { +func NewTerraformServiceDeployer(opts TerraformServiceDeployerOptions) (*TerraformServiceDeployer, error) { return &TerraformServiceDeployer{ - definitionsDir: definitionsDir, + definitionsDir: opts.DefinitionsDir, }, nil } @@ -212,8 +216,8 @@ func (tsd TerraformServiceDeployer) installDockerfile() (string, error) { return tfDir, nil } -func CreateOutputDir(locationManager *locations.LocationManager, runId string) (string, error) { - outputDir := filepath.Join(locationManager.ServiceOutputDir(), runId) +func CreateOutputDir(locationManager *locations.LocationManager, runID string) (string, error) { + outputDir := filepath.Join(locationManager.ServiceOutputDir(), runID) if err := os.MkdirAll(outputDir, 0755); err != nil { return "", fmt.Errorf("failed to create output directory: %w", err) } diff --git a/internal/testrunner/runners/system/runner.go b/internal/testrunner/runners/system/runner.go index 4938b88ab9..5c08b13ce9 100644 --- a/internal/testrunner/runners/system/runner.go +++ b/internal/testrunner/runners/system/runner.go @@ -21,6 +21,7 @@ import ( "github.com/Masterminds/semver/v3" "gopkg.in/yaml.v3" + "github.com/elastic/elastic-package/internal/agentdeployer" "github.com/elastic/elastic-package/internal/common" "github.com/elastic/elastic-package/internal/configuration/locations" "github.com/elastic/elastic-package/internal/elasticsearch" @@ -110,11 +111,13 @@ type runner struct { serviceStateFilePath string // Execution order of following handlers is defined in runner.TearDown() method. + unenrollAgentHandler func(context.Context) error deleteTestPolicyHandler func(context.Context) error deletePackageHandler func(context.Context) error resetAgentPolicyHandler func(context.Context) error resetAgentLogLevelHandler func(context.Context) error shutdownServiceHandler func(context.Context) error + shutdownAgentHandler func(context.Context) error wipeDataStreamHandler func(context.Context) error } @@ -200,12 +203,12 @@ func (r *runner) Run(ctx context.Context, options testrunner.TestOptions) ([]tes } serviceOptions := r.createServiceOptions(variant) - serviceContext, err := r.createServiceInfo(serviceOptions) + svcInfo, err := r.createServiceInfo() if err != nil { return result.WithError(err) } - testConfig, err := newConfig(configFile, serviceContext, variant) + testConfig, err := newConfig(configFile, svcInfo, variant) if err != nil { return nil, fmt.Errorf("unable to load system test case file '%s': %w", configFile, err) } @@ -222,7 +225,7 @@ func (r *runner) Run(ctx context.Context, options testrunner.TestOptions) ([]tes } result = r.newResult(fmt.Sprintf("%s - %s", resultName, testConfig.Name())) - scenario, err := r.prepareScenario(ctx, testConfig, serviceContext, serviceOptions) + scenario, err := r.prepareScenario(ctx, testConfig, svcInfo, serviceOptions) if r.options.RunSetup && err != nil { tdErr := r.tearDownTest(ctx) if tdErr != nil { @@ -261,33 +264,82 @@ func (r *runner) Run(ctx context.Context, options testrunner.TestOptions) ([]tes return result.WithSuccess() } -func (r *runner) createServiceOptions(variantName string) servicedeployer.FactoryOptions { - return servicedeployer.FactoryOptions{ +func (r *runner) createAgentOptions(variantName, policyName string) agentdeployer.FactoryOptions { + return agentdeployer.FactoryOptions{ Profile: r.options.Profile, PackageRootPath: r.options.PackageRootPath, DataStreamRootPath: r.dataStreamPath, DevDeployDir: DevDeployDir, - Variant: variantName, - Type: servicedeployer.TypeTest, + Type: agentdeployer.TypeTest, StackVersion: r.stackVersion.Version(), + PackageName: r.options.TestFolder.Package, + DataStream: r.options.TestFolder.DataStream, + PolicyName: policyName, + Variant: variantName, RunTearDown: r.options.RunTearDown, RunTestsOnly: r.options.RunTestsOnly, RunSetup: r.options.RunSetup, } } -func (r *runner) createServiceInfo(serviceOptions servicedeployer.FactoryOptions) (servicedeployer.ServiceInfo, error) { +func (r *runner) createServiceOptions(variantName string) servicedeployer.FactoryOptions { + return servicedeployer.FactoryOptions{ + Profile: r.options.Profile, + PackageRootPath: r.options.PackageRootPath, + DataStreamRootPath: r.dataStreamPath, + DevDeployDir: DevDeployDir, + Variant: variantName, + Type: servicedeployer.TypeTest, + StackVersion: r.stackVersion.Version(), + PackageName: r.options.TestFolder.Package, + DataStream: r.options.TestFolder.DataStream, + RunTearDown: r.options.RunTearDown, + RunTestsOnly: r.options.RunTestsOnly, + RunSetup: r.options.RunSetup, + DeployIndependentAgent: r.options.RunIndependentElasticAgent, + } +} + +func (r *runner) createAgentInfo(policy *kibana.Policy) (agentdeployer.AgentInfo, error) { + var info agentdeployer.AgentInfo + + info.Name = r.options.TestFolder.Package + info.Logs.Folder.Agent = ServiceLogsAgentDir + info.Test.RunID = createTestRunID() + + folderName := fmt.Sprintf("agent-%s", r.options.TestFolder.Package) + if r.options.TestFolder.DataStream != "" { + folderName = fmt.Sprintf("%s-%s", folderName, r.options.TestFolder.DataStream) + } + + dirPath, err := agentdeployer.CreateServiceLogsDir(r.locationManager, folderName) + if err != nil { + return agentdeployer.AgentInfo{}, fmt.Errorf("failed to create service logs dir: %w", err) + } + info.Logs.Folder.Local = dirPath + + info.Policy.Name = policy.Name + info.Policy.ID = policy.ID + + return info, nil +} + +func (r *runner) createServiceInfo() (servicedeployer.ServiceInfo, error) { var svcInfo servicedeployer.ServiceInfo svcInfo.Name = r.options.TestFolder.Package svcInfo.Logs.Folder.Local = r.locationManager.ServiceLogDir() svcInfo.Logs.Folder.Agent = ServiceLogsAgentDir svcInfo.Test.RunID = createTestRunID() - outputDir, err := servicedeployer.CreateOutputDir(r.locationManager, svcInfo.Test.RunID) - if err != nil { - return servicedeployer.ServiceInfo{}, fmt.Errorf("could not create output dir for terraform deployer %w", err) + if r.options.RunTearDown || r.options.RunTestsOnly { + logger.Debug("Skip creating output directory") + } else { + outputDir, err := servicedeployer.CreateOutputDir(r.locationManager, svcInfo.Test.RunID) + if err != nil { + return servicedeployer.ServiceInfo{}, fmt.Errorf("could not create output dir for terraform deployer %w", err) + } + svcInfo.OutputDir = outputDir } - svcInfo.OutputDir = outputDir return svcInfo, nil } @@ -323,6 +375,13 @@ func (r *runner) tearDownTest(ctx context.Context) error { r.resetAgentLogLevelHandler = nil } + if r.unenrollAgentHandler != nil { + if err := r.unenrollAgentHandler(cleanupCtx); err != nil { + return err + } + r.unenrollAgentHandler = nil + } + if r.deleteTestPolicyHandler != nil { if err := r.deleteTestPolicyHandler(cleanupCtx); err != nil { return err @@ -344,6 +403,13 @@ func (r *runner) tearDownTest(ctx context.Context) error { r.shutdownServiceHandler = nil } + if r.shutdownAgentHandler != nil { + if err := r.shutdownAgentHandler(cleanupCtx); err != nil { + return err + } + r.shutdownAgentHandler = nil + } + if r.wipeDataStreamHandler != nil { if err := r.wipeDataStreamHandler(cleanupCtx); err != nil { return err @@ -478,19 +544,19 @@ func (r *runner) run(ctx context.Context) (results []testrunner.TestResult, err func (r *runner) runTestPerVariant(ctx context.Context, result *testrunner.ResultComposer, cfgFile, variantName string) ([]testrunner.TestResult, error) { serviceOptions := r.createServiceOptions(variantName) - serviceContext, err := r.createServiceInfo(serviceOptions) + svcInfo, err := r.createServiceInfo() if err != nil { return result.WithError(err) } configFile := filepath.Join(r.options.TestFolder.Path, cfgFile) - testConfig, err := newConfig(configFile, serviceContext, variantName) + testConfig, err := newConfig(configFile, svcInfo, variantName) if err != nil { return nil, fmt.Errorf("unable to load system test case file '%s': %w", configFile, err) } logger.Debugf("Using config: %q", testConfig.Name()) - partial, err := r.runTest(ctx, testConfig, serviceContext, serviceOptions) + partial, err := r.runTest(ctx, testConfig, svcInfo, serviceOptions) tdErr := r.tearDownTest(ctx) if err != nil { @@ -648,9 +714,11 @@ type scenarioTest struct { kibanaDataStream kibana.PackageDataStream syntheticEnabled bool docs []common.MapStr + agent agentdeployer.DeployedAgent + enrollingTime time.Time } -func (r *runner) prepareScenario(ctx context.Context, config *testConfig, serviceContext servicedeployer.ServiceInfo, serviceOptions servicedeployer.FactoryOptions) (*scenarioTest, error) { +func (r *runner) prepareScenario(ctx context.Context, config *testConfig, svcInfo servicedeployer.ServiceInfo, serviceOptions servicedeployer.FactoryOptions) (*scenarioTest, error) { var err error var serviceStateData ServiceState if r.options.RunSetup { @@ -692,32 +760,57 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, servic return nil, fmt.Errorf("failed to find the selected policy_template: %w", err) } - // Setup service. - logger.Debug("setting up service...") - serviceDeployer, err := servicedeployer.Factory(serviceOptions) - if err != nil { - return nil, fmt.Errorf("could not create service runner: %w", err) + // Configure package (single data stream) via Fleet APIs. + var policy *kibana.Policy + if r.options.RunTearDown || r.options.RunTestsOnly { + policy = &serviceStateData.CurrentPolicy + logger.Debugf("Got policy from file: %q - %q", policy.Name, policy.ID) + } else { + logger.Debug("creating test policy...") + testTime := time.Now().Format("20060102T15:04:05Z") + + p := kibana.Policy{ + Name: fmt.Sprintf("ep-test-system-%s-%s-%s", r.options.TestFolder.Package, r.options.TestFolder.DataStream, testTime), + Description: fmt.Sprintf("test policy created by elastic-package test system for data stream %s/%s", r.options.TestFolder.Package, r.options.TestFolder.DataStream), + Namespace: "ep", + } + // Assign the data_output_id to the agent policy to configure the output to logstash. The value is inferred from stack/_static/kibana.yml.tmpl + if r.options.Profile.Config("stack.logstash_enabled", "false") == "true" { + p.DataOutputID = "fleet-logstash-output" + } + policy, err = r.options.KibanaClient.CreatePolicy(ctx, p) + if err != nil { + return nil, fmt.Errorf("could not create test policy: %w", err) + } + } + r.deleteTestPolicyHandler = func(ctx context.Context) error { + logger.Debug("deleting test policy...") + if err := r.options.KibanaClient.DeletePolicy(ctx, *policy); err != nil { + return fmt.Errorf("error cleaning up test policy: %w", err) + } + return nil } - if config.Service != "" { - serviceContext.Name = config.Service + enrollingTime := time.Now() + if r.options.RunTearDown || r.options.RunTestsOnly { + enrollingTime = serviceStateData.EnrollingAgentTime } - service, err := serviceDeployer.SetUp(ctx, serviceContext) + + agentDeployed, agentInfo, err := r.setupAgent(ctx, serviceOptions.Variant, serviceStateData, policy) if err != nil { - return nil, fmt.Errorf("could not setup service: %w", err) + return nil, err } - serviceContext = service.Info() - r.shutdownServiceHandler = func(ctx context.Context) error { - logger.Debug("tearing down service...") - if err := service.TearDown(ctx); err != nil { - return fmt.Errorf("error tearing down service: %w", err) - } - return nil + scenario.enrollingTime = enrollingTime + scenario.agent = agentDeployed + + service, svcInfo, err := r.setupService(ctx, config, serviceOptions, svcInfo, agentInfo, agentDeployed, serviceStateData) + if err != nil { + return nil, err } // Reload test config with ctx variable substitution. - config, err = newConfig(config.Path, serviceContext, serviceOptions.Variant) + config, err = newConfig(config.Path, svcInfo, serviceOptions.Variant) if err != nil { return nil, fmt.Errorf("unable to reload system test case configuration: %w", err) } @@ -777,37 +870,6 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, servic } r.deletePackageHandler = deletePackageHandler - // Configure package (single data stream) via Fleet APIs. - var policy *kibana.Policy - if r.options.RunTearDown || r.options.RunTestsOnly { - policy = &serviceStateData.CurrentPolicy - logger.Debugf("Got policy from file: %q - %q", policy.Name, policy.ID) - } else { - logger.Debug("creating test policy...") - testTime := time.Now().Format("20060102T15:04:05Z") - - p := kibana.Policy{ - Name: fmt.Sprintf("ep-test-system-%s-%s-%s", r.options.TestFolder.Package, r.options.TestFolder.DataStream, testTime), - Description: fmt.Sprintf("test policy created by elastic-package test system for data stream %s/%s", r.options.TestFolder.Package, r.options.TestFolder.DataStream), - Namespace: "ep", - } - // Assign the data_output_id to the agent policy to configure the output to logstash. The value is inferred from stack/_static/kibana.yml.tmpl - if r.options.Profile.Config("stack.logstash_enabled", "false") == "true" { - p.DataOutputID = "fleet-logstash-output" - } - policy, err = r.options.KibanaClient.CreatePolicy(ctx, p) - if err != nil { - return nil, fmt.Errorf("could not create test policy: %w", err) - } - } - r.deleteTestPolicyHandler = func(ctx context.Context) error { - logger.Debug("deleting test policy...") - if err := r.options.KibanaClient.DeletePolicy(ctx, *policy); err != nil { - return fmt.Errorf("error cleaning up test policy: %w", err) - } - return nil - } - logger.Debug("adding package data stream to test policy...") ds := createPackageDatastream(*policy, *scenario.pkgManifest, policyTemplate, *scenario.dataStreamManifest, *config) if r.options.RunTearDown || r.options.RunTestsOnly { @@ -859,17 +921,32 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, servic } } + // FIXME: running per stages does not work when multiple agents are created var origPolicy kibana.Policy - agents, err := checkEnrolledAgents(ctx, r.options.KibanaClient, serviceContext) + agents, err := checkEnrolledAgents(ctx, r.options.KibanaClient, agentInfo, svcInfo, r.options.RunIndependentElasticAgent) if err != nil { return nil, fmt.Errorf("can't check enrolled agents: %w", err) } agent := agents[0] + logger.Debugf("Selected enrolled agent %q", agent.ID) + + r.unenrollAgentHandler = func(ctx context.Context) error { + if !r.options.RunIndependentElasticAgent { + return nil + } + logger.Debug("unenrolling agent...") + err := r.options.KibanaClient.RemoveAgent(ctx, agent) + if err != nil { + return fmt.Errorf("failed to unenroll agent %q: %w", agent.ID, err) + } + return nil + } if r.options.RunTearDown || r.options.RunTestsOnly { origPolicy = serviceStateData.OrigPolicy logger.Debugf("Got orig policy from file: %q - %q", origPolicy.Name, origPolicy.ID) } else { + // Store previous agent policy assigned to the agent origPolicy = kibana.Policy{ ID: agent.PolicyID, Revision: agent.PolicyRevision, @@ -877,6 +954,9 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, servic } // Assign policy to agent r.resetAgentPolicyHandler = func(ctx context.Context) error { + if r.options.RunIndependentElasticAgent { + return nil + } logger.Debug("reassigning original policy back to agent...") if err := r.options.KibanaClient.AssignPolicyToAgent(ctx, agent, origPolicy); err != nil { return fmt.Errorf("error reassigning original policy to agent: %w", err) @@ -886,12 +966,10 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, servic origAgent := agent origLogLevel := "" - switch { - case r.options.RunTearDown: - logger.Debug("Skip assiging log level debut to agent") - logger.Debugf("Got agent from file: %q - %q", serviceStateData.Agent.ID, serviceStateData.Agent.LocalMetadata.Elastic.Agent.LogLevel) + if r.options.RunTearDown { + logger.Debug("Skip assiging log level debug to agent") origLogLevel = serviceStateData.Agent.LocalMetadata.Elastic.Agent.LogLevel - default: + } else { logger.Debug("Set Debug log level to agent") origLogLevel = agent.LocalMetadata.Elastic.Agent.LogLevel err = r.options.KibanaClient.SetAgentLogLevel(ctx, agent.ID, "debug") @@ -908,10 +986,9 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, servic return nil } - switch { - case r.options.RunTearDown || r.options.RunTestsOnly: + if r.options.RunTearDown || r.options.RunTestsOnly { logger.Debug("Skip assiging package data stream to agent") - default: + } else { policyWithDataStream, err := r.options.KibanaClient.GetPolicy(ctx, policy.ID) if err != nil { return nil, fmt.Errorf("could not read the policy with data stream: %w", err) @@ -996,7 +1073,16 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, servic scenario.docs = hits.getDocs(scenario.syntheticEnabled) if r.options.RunSetup { - err = r.writeScenarioState(policy, &origPolicy, config, origAgent) + opts := scenarioStateOpts{ + origPolicy: &origPolicy, + currentPolicy: policy, + config: config, + agent: origAgent, + enrollingTime: enrollingTime, + agentInfo: agentInfo, + svcInfo: svcInfo, + } + err = r.writeScenarioState(opts) if err != nil { return nil, err } @@ -1005,6 +1091,94 @@ func (r *runner) prepareScenario(ctx context.Context, config *testConfig, servic return &scenario, nil } +func (r *runner) setupService(ctx context.Context, config *testConfig, serviceOptions servicedeployer.FactoryOptions, svcInfo servicedeployer.ServiceInfo, agentInfo agentdeployer.AgentInfo, agentDeployed agentdeployer.DeployedAgent, state ServiceState) (servicedeployer.DeployedService, servicedeployer.ServiceInfo, error) { + logger.Debug("setting up service...") + if r.options.RunTearDown || r.options.RunTestsOnly { + svcInfo.Test.RunID = state.ServiceRunID + svcInfo.OutputDir = state.ServiceOutputDir + } + + // By default using agent running in the Elastic stack + svcInfo.AgentNetworkName = stack.Network(r.options.Profile) + if agentDeployed != nil { + svcInfo.AgentNetworkName = agentInfo.NetworkName + } + + // Set the right folder for logs execpt for custom agents that are still deployed using "servicedeployer" + if r.options.RunIndependentElasticAgent && agentDeployed != nil { + svcInfo.Logs.Folder.Local = agentInfo.Logs.Folder.Local + } + + // In case of custom agent (servicedeployer) enabling independent agents, update serviceOptions to include test policy too + if r.options.RunIndependentElasticAgent { + serviceOptions.PolicyName = agentInfo.Policy.Name + } + + if config.Service != "" { + svcInfo.Name = config.Service + } + + serviceDeployer, err := servicedeployer.Factory(serviceOptions) + if err != nil { + return nil, svcInfo, fmt.Errorf("could not create service runner: %w", err) + } + + service, err := serviceDeployer.SetUp(ctx, svcInfo) + if err != nil { + return nil, svcInfo, fmt.Errorf("could not setup service: %w", err) + } + + r.shutdownServiceHandler = func(ctx context.Context) error { + logger.Debug("tearing down service...") + if err := service.TearDown(ctx); err != nil { + return fmt.Errorf("error tearing down service: %w", err) + } + + return nil + } + return service, service.Info(), nil +} + +func (r *runner) setupAgent(ctx context.Context, variant string, state ServiceState, policy *kibana.Policy) (agentdeployer.DeployedAgent, agentdeployer.AgentInfo, error) { + if !r.options.RunIndependentElasticAgent { + return nil, agentdeployer.AgentInfo{}, nil + } + logger.Warn("setting up agent (technical preview)...") + agentInfo, err := r.createAgentInfo(policy) + if err != nil { + return nil, agentdeployer.AgentInfo{}, err + } + if r.options.RunTearDown || r.options.RunTestsOnly { + agentInfo.Test.RunID = state.AgentRunID + } + + agentOptions := r.createAgentOptions(variant, agentInfo.Policy.Name) + agentDeployer, err := agentdeployer.Factory(agentOptions) + if err != nil { + return nil, agentInfo, fmt.Errorf("could not create agent runner: %w", err) + } + if agentDeployer == nil { + return nil, agentInfo, nil + } + + agentDeployed, err := agentDeployer.SetUp(ctx, agentInfo) + if err != nil { + return nil, agentInfo, fmt.Errorf("could not setup agent: %w", err) + } + r.shutdownAgentHandler = func(ctx context.Context) error { + if agentDeployer == nil { + return nil + } + logger.Debug("tearing down agent...") + if err := agentDeployed.TearDown(ctx); err != nil { + return fmt.Errorf("error tearing down agent: %w", err) + } + + return nil + } + return agentDeployed, agentDeployed.Info(), nil +} + func (r *runner) removeServiceStateFile() error { err := os.Remove(r.serviceStateFilePath) if err != nil { @@ -1037,20 +1211,38 @@ func (r *runner) readServiceStateData() (ServiceState, error) { } type ServiceState struct { - OrigPolicy kibana.Policy `json:"orig_policy"` - CurrentPolicy kibana.Policy `json:"current_policy"` - Agent kibana.Agent `json:"agent"` - ConfigFilePath string `json:"config_file_path"` - VariantName string `json:"variant_name"` + OrigPolicy kibana.Policy `json:"orig_policy"` + CurrentPolicy kibana.Policy `json:"current_policy"` + Agent kibana.Agent `json:"agent"` + ConfigFilePath string `json:"config_file_path"` + VariantName string `json:"variant_name"` + EnrollingAgentTime time.Time `json:"enrolling_agent_time"` + ServiceRunID string `json:"service_info_run_id"` + AgentRunID string `json:"agent_info_run_id"` + ServiceOutputDir string `json:"service_output_dir"` +} + +type scenarioStateOpts struct { + currentPolicy *kibana.Policy + origPolicy *kibana.Policy + config *testConfig + agent kibana.Agent + enrollingTime time.Time + agentInfo agentdeployer.AgentInfo + svcInfo servicedeployer.ServiceInfo } -func (r *runner) writeScenarioState(currentPolicy, origPolicy *kibana.Policy, config *testConfig, agent kibana.Agent) error { +func (r *runner) writeScenarioState(opts scenarioStateOpts) error { data := ServiceState{ - OrigPolicy: *origPolicy, - CurrentPolicy: *currentPolicy, - Agent: agent, - ConfigFilePath: config.Path, - VariantName: config.ServiceVariantName, + OrigPolicy: *opts.origPolicy, + CurrentPolicy: *opts.currentPolicy, + Agent: opts.agent, + ConfigFilePath: opts.config.Path, + VariantName: opts.config.ServiceVariantName, + EnrollingAgentTime: opts.enrollingTime, + ServiceRunID: opts.svcInfo.Test.RunID, + AgentRunID: opts.agentInfo.Test.RunID, + ServiceOutputDir: opts.svcInfo.OutputDir, } dataBytes, err := json.Marshal(data) if err != nil { @@ -1169,10 +1361,20 @@ func (r *runner) validateTestScenario(ctx context.Context, result *testrunner.Re return result.WithError(err) } + if scenario.agent != nil { + logResults, err := r.checkNewAgentLogs(ctx, scenario.agent, scenario.enrollingTime, errorPatterns) + if err != nil { + return result.WithError(err) + } + if len(logResults) > 0 { + return logResults, nil + } + } + return result.WithSuccess() } -func (r *runner) runTest(ctx context.Context, config *testConfig, serviceContext servicedeployer.ServiceInfo, serviceOptions servicedeployer.FactoryOptions) ([]testrunner.TestResult, error) { +func (r *runner) runTest(ctx context.Context, config *testConfig, svcInfo servicedeployer.ServiceInfo, serviceOptions servicedeployer.FactoryOptions) ([]testrunner.TestResult, error) { result := r.newResult(config.Name()) if config.Skip != nil { @@ -1184,7 +1386,7 @@ func (r *runner) runTest(ctx context.Context, config *testConfig, serviceContext logger.Debugf("running test with configuration '%s'", config.Name()) - scenario, err := r.prepareScenario(ctx, config, serviceContext, serviceOptions) + scenario, err := r.prepareScenario(ctx, config, svcInfo, serviceOptions) if err != nil { return result.WithError(err) } @@ -1192,15 +1394,20 @@ func (r *runner) runTest(ctx context.Context, config *testConfig, serviceContext return r.validateTestScenario(ctx, result, scenario, config, serviceOptions) } -func checkEnrolledAgents(ctx context.Context, client *kibana.Client, serviceContext servicedeployer.ServiceInfo) ([]kibana.Agent, error) { +func checkEnrolledAgents(ctx context.Context, client *kibana.Client, agentInfo agentdeployer.AgentInfo, svcInfo servicedeployer.ServiceInfo, runIndependentElasticAgent bool) ([]kibana.Agent, error) { var agents []kibana.Agent + enrolled, err := wait.UntilTrue(ctx, func(ctx context.Context) (bool, error) { allAgents, err := client.ListAgents(ctx) if err != nil { return false, fmt.Errorf("could not list agents: %w", err) } - agents = filterAgents(allAgents, serviceContext) + if runIndependentElasticAgent { + agents = filterIndependentAgents(allAgents, agentInfo) + } else { + agents = filterAgents(allAgents, svcInfo) + } logger.Debugf("found %d enrolled agent(s)", len(agents)) if len(agents) == 0 { return false, nil // selected agents are unavailable yet @@ -1614,9 +1821,32 @@ func filterAgents(allAgents []kibana.Agent, svcInfo servicedeployer.ServiceInfo) continue // For some reason Kibana doesn't always return a valid policy revision (eventually it will be present and valid) } - if svcInfo.Agent.Host.NamePrefix != "" && !strings.HasPrefix(agent.LocalMetadata.Host.Name, svcInfo.Agent.Host.NamePrefix) { + hasServicePrefix := strings.HasPrefix(agent.LocalMetadata.Host.Name, svcInfo.Agent.Host.NamePrefix) + if svcInfo.Agent.Host.NamePrefix != "" && !hasServicePrefix { + continue + } + filtered = append(filtered, agent) + } + return filtered +} + +func filterIndependentAgents(allAgents []kibana.Agent, agentInfo agentdeployer.AgentInfo) []kibana.Agent { + // filtered list of agents must contain all agents started by the stack + // they could be assigned the default policy (elastic-agent-managed-ep) or the test policy (ep-test-system-*) + var filtered []kibana.Agent + for _, agent := range allAgents { + if agent.PolicyRevision == 0 { + continue // For some reason Kibana doesn't always return a valid policy revision (eventually it will be present and valid) + } + + if agent.Status != "online" { continue } + + if agent.PolicyID != agentInfo.Policy.ID { + continue + } + filtered = append(filtered, agent) } return filtered @@ -1706,6 +1936,58 @@ func (r *runner) generateTestResult(docs []common.MapStr, specVersion semver.Ver return nil } +func (r *runner) checkNewAgentLogs(ctx context.Context, agent agentdeployer.DeployedAgent, startTesting time.Time, errorPatterns []logsByContainer) (results []testrunner.TestResult, err error) { + if agent == nil { + return nil, nil + } + + f, err := os.CreateTemp("", "elastic-agent.logs") + if err != nil { + return nil, fmt.Errorf("failed to create temp file for logs: %w", err) + } + defer os.Remove(f.Name()) + + for _, patternsContainer := range errorPatterns { + if patternsContainer.containerName != "elastic-agent" { + continue + } + + startTime := time.Now() + + outputBytes, err := agent.Logs(ctx, startTesting) + if err != nil { + return nil, fmt.Errorf("check log messages failed: %s", err) + } + _, err = f.Write(outputBytes) + if err != nil { + return nil, fmt.Errorf("write log messages failed: %s", err) + } + + err = r.anyErrorMessages(f.Name(), startTesting, patternsContainer.patterns) + if e, ok := err.(testrunner.ErrTestCaseFailed); ok { + tr := testrunner.TestResult{ + TestType: TestType, + Name: fmt.Sprintf("(%s logs)", patternsContainer.containerName), + Package: r.options.TestFolder.Package, + DataStream: r.options.TestFolder.DataStream, + } + tr.FailureMsg = e.Error() + tr.FailureDetails = e.Details + tr.TimeElapsed = time.Since(startTime) + results = append(results, tr) + // Just check elastic-agent + break + } + + if err != nil { + return nil, fmt.Errorf("check log messages failed: %s", err) + } + // Just check elastic-agent + break + } + return results, nil +} + func (r *runner) checkAgentLogs(dumpOptions stack.DumpOptions, startTesting time.Time, errorPatterns []logsByContainer) (results []testrunner.TestResult, err error) { for _, patternsContainer := range errorPatterns { startTime := time.Now() diff --git a/internal/testrunner/testrunner.go b/internal/testrunner/testrunner.go index cfd18af96d..8a87ec2315 100644 --- a/internal/testrunner/testrunner.go +++ b/internal/testrunner/testrunner.go @@ -37,6 +37,8 @@ type TestOptions struct { API *elasticsearch.API KibanaClient *kibana.Client + RunIndependentElasticAgent bool + DeferCleanup time.Duration ServiceVariant string WithCoverage bool diff --git a/scripts/test-check-packages.sh b/scripts/test-check-packages.sh index f0a9c88a9c..f5aaa84f75 100755 --- a/scripts/test-check-packages.sh +++ b/scripts/test-check-packages.sh @@ -6,7 +6,8 @@ cleanup() { r=$? # Dump stack logs - elastic-package stack dump -v --output "build/elastic-stack-dump/check-${PACKAGE_UNDER_TEST:-${PACKAGE_TEST_TYPE:-any}}" + elastic-package stack dump -v \ + --output "build/elastic-stack-dump/check-${PACKAGE_UNDER_TEST:-${PACKAGE_TEST_TYPE:-any}}" if [ "${PACKAGE_TEST_TYPE:-other}" == "with-kind" ]; then # Dump kubectl details @@ -40,6 +41,8 @@ cleanup() { trap cleanup EXIT +ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT=${ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT:-"false"} +export ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT ELASTIC_PACKAGE_LINKS_FILE_PATH="$(pwd)/scripts/links_table.yml" export ELASTIC_PACKAGE_LINKS_FILE_PATH @@ -107,7 +110,12 @@ for d in test/packages/${PACKAGE_TEST_TYPE:-other}/${PACKAGE_UNDER_TEST:-*}/; do elastic-package benchmark system --benchmark logs-benchmark -v --defer-cleanup 1s else # defer-cleanup is set to a short period to verify that the option is available - elastic-package test -v --report-format xUnit --report-output file --defer-cleanup 1s --test-coverage --coverage-format=generic + elastic-package test -v \ + --report-format xUnit \ + --report-output file \ + --defer-cleanup 1s \ + --test-coverage \ + --coverage-format=generic fi ) cd - diff --git a/scripts/test-system-test-flags.sh b/scripts/test-system-test-flags.sh index 7c5f2075e5..b4679de3e6 100755 --- a/scripts/test-system-test-flags.sh +++ b/scripts/test-system-test-flags.sh @@ -201,6 +201,8 @@ run_tests_for_package() { AGENT_CONTAINER_NAME="${DEFAULT_AGENT_CONTAINER_NAME}" fi + ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT=${ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT:-"false"} + export ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT pushd "${package_folder}" > /dev/null echo "--- [${package_name} - ${variant}] Setup service without tear-down"