Skip to content

Commit

Permalink
Ensure socket path is < 104 characters when STATE_PATH is set. (#4909)
Browse files Browse the repository at this point in the history
* Ensure socket path when STATE_PATH is set is < 104 characters long

* Fix Windows and use "container" test group

This commit fixes the implementation on Windows and puts the
container integration tests in the container group again. The test for
long state paths will create files outside of the test directory.

It also cleans up the default folder used as state path.

* Fix corner cases and improve tests

* Add changelog

* Fix lint warning

* Only change socket path

* Update tests to new approach

* remove un-used function

* Update internal/pkg/agent/cmd/container.go

Co-authored-by: Michel Laterman <82832767+michel-laterman@users.noreply.github.com>

* fix build and PR suggestions

---------

Co-authored-by: Michel Laterman <82832767+michel-laterman@users.noreply.github.com>
Co-authored-by: Pierre HILBERT <pierre.hilbert@elastic.co>
  • Loading branch information
3 people committed Jun 21, 2024
1 parent b77f435 commit 8a30900
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 32 deletions.
35 changes: 35 additions & 0 deletions changelog/fragments/1718226176-Ensure.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: bug-fix

# Change summary; a 80ish characters long description of the change.
summary: Ensure socket can be created even when STATE_PATH is too long for container command

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: |
When running the Elastic-Agent container command and `STATE_PATH` is
set to a value that is too long the Unix socket cannot be created,
so the Elastic-Agent will use the default state path instead.
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/4909

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
11 changes: 10 additions & 1 deletion internal/pkg/agent/cmd/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"encoding/json"
"fmt"
"io"
"io/fs"
"net/url"
"os"
"os/exec"
Expand All @@ -34,6 +35,7 @@ import (
"github.com/elastic/elastic-agent/pkg/component"
"github.com/elastic/elastic-agent/pkg/core/logger"
"github.com/elastic/elastic-agent/pkg/core/process"
"github.com/elastic/elastic-agent/pkg/utils"
"github.com/elastic/elastic-agent/version"
)

Expand Down Expand Up @@ -774,13 +776,20 @@ func setPaths(statePath, configPath, logsPath, socketPath string, writePaths boo
if statePath == "" {
statePath = defaultStateDirectory
}

topPath := filepath.Join(statePath, "data")
configPath = envWithDefault(configPath, "CONFIG_PATH")
if configPath == "" {
configPath = statePath
}
if _, err := os.Stat(configPath); errors.Is(err, fs.ErrNotExist) {
if err := os.MkdirAll(configPath, 0755); err != nil {
return fmt.Errorf("cannot create folders for config path '%s': %w", configPath, err)
}
}

if socketPath == "" {
socketPath = fmt.Sprintf("unix://%s", filepath.Join(topPath, paths.ControlSocketName))
socketPath = utils.SocketURLWithFallback(statePath, topPath)
}
// ensure that the directory and sub-directory data exists
if err := os.MkdirAll(topPath, 0755); err != nil {
Expand Down
177 changes: 146 additions & 31 deletions testing/integration/container_cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
Expand All @@ -18,29 +20,12 @@ import (
"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent-libs/kibana"
atesting "github.com/elastic/elastic-agent/pkg/testing"
"github.com/elastic/elastic-agent/pkg/testing/define"
"github.com/elastic/elastic-agent/pkg/testing/tools/fleettools"
)

func TestContainerCMD(t *testing.T) {
info := define.Require(t, define.Requirements{
Stack: &define.Stack{},
Local: false,
Sudo: true,
OS: []define.OS{
{Type: define.Linux},
},
// This test runs the command we use when executing inside a container
// which leaves files under /usr/share/elastic-agent. Run it isolated
// to avoid interfering with other tests and better simulate a container
// environment we run it in isolation
Group: "container",
})
ctx := context.Background()

agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version())
require.NoError(t, err)

func createPolicy(t *testing.T, ctx context.Context, agentFixture *atesting.Fixture, info *define.Info) string {
createPolicyReq := kibana.AgentPolicy{
Name: fmt.Sprintf("test-policy-enroll-%s", uuid.New().String()),
Namespace: info.Namespace,
Expand Down Expand Up @@ -74,13 +59,10 @@ func TestContainerCMD(t *testing.T) {
t.Fatalf("unable to create enrolment API key: %s", err)
}

fleetURL, err := fleettools.DefaultURL(ctx, info.KibanaClient)
if err != nil {
t.Fatalf("could not get Fleet URL: %s", err)
}
return enrollmentToken.APIKey
}

ctx, cancel := context.WithTimeout(ctx, 1*time.Minute)
defer cancel()
func prepareContainerCMD(t *testing.T, ctx context.Context, agentFixture *atesting.Fixture, info *define.Info, env []string) *exec.Cmd {
cmd, err := agentFixture.PrepareAgentCommand(ctx, []string{"container"})
if err != nil {
t.Fatalf("could not prepare agent command: %s", err)
Expand Down Expand Up @@ -112,22 +94,52 @@ func TestContainerCMD(t *testing.T) {
agentOutput := strings.Builder{}
cmd.Stderr = &agentOutput
cmd.Stdout = &agentOutput
cmd.Env = append(os.Environ(),
cmd.Env = append(os.Environ(), env...)
return cmd
}

func TestContainerCMD(t *testing.T) {
info := define.Require(t, define.Requirements{
Stack: &define.Stack{},
Local: false,
Sudo: true,
OS: []define.OS{
{Type: define.Linux},
},
Group: "container",
})

ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()

agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version())
require.NoError(t, err)

fleetURL, err := fleettools.DefaultURL(ctx, info.KibanaClient)
if err != nil {
t.Fatalf("could not get Fleet URL: %s", err)
}

enrollmentToken := createPolicy(t, ctx, agentFixture, info)
env := []string{
"FLEET_ENROLL=1",
"FLEET_URL="+fleetURL,
"FLEET_ENROLLMENT_TOKEN="+enrollmentToken.APIKey,
"FLEET_URL=" + fleetURL,
"FLEET_ENROLLMENT_TOKEN=" + enrollmentToken,
// As the agent isn't built for a container, it's upgradable, triggering
// the start of the upgrade watcher. If `STATE_PATH` isn't set, the
// upgrade watcher will commence from a different path within the
// container, distinct from the current execution path.
"STATE_PATH="+agentFixture.WorkDir(),
)
"STATE_PATH=" + agentFixture.WorkDir(),
}

cmd := prepareContainerCMD(t, ctx, agentFixture, info, env)
t.Logf(">> running binary with: %v", cmd.Args)
if err := cmd.Start(); err != nil {
t.Fatalf("error running container cmd: %s", err)
}

agentOutput := cmd.Stderr.(*strings.Builder)

require.Eventuallyf(t, func() bool {
// This will return errors until it connects to the agent,
// they're mostly noise because until the agent starts running
Expand All @@ -140,6 +152,109 @@ func TestContainerCMD(t *testing.T) {
},
5*time.Minute, time.Second,
"Elastic-Agent did not report healthy. Agent status error: \"%v\", Agent logs\n%s",
err, &agentOutput,
err, agentOutput,
)
}

func TestContainerCMDWithAVeryLongStatePath(t *testing.T) {
info := define.Require(t, define.Requirements{
Stack: &define.Stack{},
Local: false,
Sudo: true,
OS: []define.OS{
{Type: define.Linux},
},
Group: "container",
})

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
defer cancel()

fleetURL, err := fleettools.DefaultURL(ctx, info.KibanaClient)
if err != nil {
t.Fatalf("could not get Fleet URL: %s", err)
}

testCases := map[string]struct {
statePath string
expectedStatePath string
expectedSocketPath string
expectError bool
}{
"small path": { // Use the set path
statePath: filepath.Join(os.TempDir(), "foo", "bar"),
expectedStatePath: filepath.Join(os.TempDir(), "foo", "bar"),
expectedSocketPath: "/tmp/foo/bar/data/smp7BzlzcwgrLK4PUxpu7G1O5UwV4adr.sock",
},
"no path set": { // Use the default path
statePath: "",
expectedStatePath: "/usr/share/elastic-agent/state",
expectedSocketPath: "/usr/share/elastic-agent/state/data/Td8I7R-Zby36_zF_IOd9QVNlFblNEro3.sock",
},
"long path": { // Path too long to create a unix socket, it will use /tmp/elastic-agent
statePath: "/tmp/ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
expectedStatePath: "/tmp/ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
expectedSocketPath: "/tmp/elastic-agent/Xegnlbb8QDcqNLPzyf2l8PhVHjWvlQgZ.sock",
},
}

for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
agentFixture, err := define.NewFixtureFromLocalBuild(t, define.Version())
require.NoError(t, err)

enrollmentToken := createPolicy(t, ctx, agentFixture, info)
env := []string{
"FLEET_ENROLL=1",
"FLEET_URL=" + fleetURL,
"FLEET_ENROLLMENT_TOKEN=" + enrollmentToken,
"STATE_PATH=" + tc.statePath,
}

cmd := prepareContainerCMD(t, ctx, agentFixture, info, env)
t.Logf(">> running binary with: %v", cmd.Args)
if err := cmd.Start(); err != nil {
t.Fatalf("error running container cmd: %s", err)
}
agentOutput := cmd.Stderr.(*strings.Builder)

require.Eventuallyf(t, func() bool {
// This will return errors until it connects to the agent,
// they're mostly noise because until the agent starts running
// we will get connection errors. If the test fails
// the agent logs will be present in the error message
// which should help to explain why the agent was not
// healthy.
err = agentFixture.IsHealthy(ctx)
return err == nil
},
1*time.Minute, time.Second,
"Elastic-Agent did not report healthy. Agent status error: \"%v\", Agent logs\n%s",
err, agentOutput,
)

t.Cleanup(func() {
_ = os.RemoveAll(tc.expectedStatePath)
})

// Now that the Elastic-Agent is healthy, check that the control socket path
// is the expected one
if _, err := os.Stat(tc.expectedStatePath); err != nil {
t.Errorf("cannot stat expected state path ('%s'): %s", tc.expectedStatePath, err)
}
if _, err := os.Stat(tc.expectedSocketPath); err != nil {
t.Errorf("cannot stat expected socket path ('%s'): %s", tc.expectedSocketPath, err)
}

if t.Failed() {
containerPaths, err := os.ReadFile(filepath.Join(agentFixture.WorkDir(), "container-paths.yml"))
if err != nil {
t.Fatalf("could not read container-paths.yml: %s", err)
}

t.Log("contents of 'container-paths-yml'")
t.Log(string(containerPaths))
}
})
}
}

0 comments on commit 8a30900

Please sign in to comment.