Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate "ownership" of hostPort service being deleted #22587

Merged
merged 1 commit into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions pkg/k8s/watchers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
k8sTypes "github.com/cilium/cilium/pkg/k8s/types"
k8sUtils "github.com/cilium/cilium/pkg/k8s/utils"
"github.com/cilium/cilium/pkg/k8s/watchers/resources"
"github.com/cilium/cilium/pkg/k8s/watchers/utils"
"github.com/cilium/cilium/pkg/kvstore"
"github.com/cilium/cilium/pkg/labels"
"github.com/cilium/cilium/pkg/labelsfilter"
Expand Down Expand Up @@ -773,6 +774,17 @@ func (k *K8sWatcher) deleteHostPortMapping(pod *slim_corev1.Pod, podIPs []string
}

for _, dpSvc := range svcs {
svc, _ := k.svcManager.GetDeepCopyServiceByFrontend(dpSvc.Frontend.L3n4Addr)
// Check whether the service being deleted is in fact "owned" by the pod being deleted.
// We want to make sure that the pod being deleted is in fact the "current" backend that
// "owns" the hostPort service. Otherwise we might break hostPort connectivity for another
// pod which may have since claimed ownership for the same hostPort service, which was previously
// "owned" by the pod being deleted.
// See: https://github.com/cilium/cilium/issues/22460.
if svc != nil && !utils.DeepEqualBackends(svc.Backends, dpSvc.Backends) {
continue
}

if _, err := k.svcManager.DeleteService(dpSvc.Frontend.L3n4Addr); err != nil {
logger.WithError(err).Error("Error while deleting service in LB map")
return err
Expand Down
29 changes: 29 additions & 0 deletions pkg/k8s/watchers/utils/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium

package utils

import "github.com/cilium/cilium/pkg/loadbalancer"

// Compare slices of backends to see if they are deeply equal.
// The comparison is agnostic of the order in which the slices are provided.
func DeepEqualBackends(backends1 []*loadbalancer.Backend, backends2 []*loadbalancer.Backend) bool {
if len(backends1) != len(backends2) {
return false
}

l3n4AddrMap := make(map[loadbalancer.L3n4Addr]struct{})

for _, backend1 := range backends1 {
l3n4AddrMap[backend1.L3n4Addr] = struct{}{}
}

for _, backend2 := range backends2 {
if _, ok := l3n4AddrMap[backend2.L3n4Addr]; ok {
continue
}
return false
}

return true
}
159 changes: 159 additions & 0 deletions pkg/k8s/watchers/utils/utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium

package utils

import (
"testing"

cmtypes "github.com/cilium/cilium/pkg/clustermesh/types"
"github.com/cilium/cilium/pkg/loadbalancer"
)

func TestDeepEqualBackends(t *testing.T) {
type args struct {
backends1, backends2 []*loadbalancer.Backend
}
testCases := []struct {
name string
args args
want bool
}{
{
name: "backends not equal",
args: args{
backends1: []*loadbalancer.Backend{
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.2"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.3"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
},
backends2: []*loadbalancer.Backend{
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.2"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.8"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
},
},
want: false,
},
{
name: "backend slice lengths not equal",
args: args{
backends1: []*loadbalancer.Backend{
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.2"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.3"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
},
backends2: []*loadbalancer.Backend{
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.2"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
},
},
want: false,
},
{
name: "backends equal",
args: args{
backends1: []*loadbalancer.Backend{
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.2"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.3"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
},
backends2: []*loadbalancer.Backend{
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.2"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
{
L3n4Addr: loadbalancer.L3n4Addr{
AddrCluster: cmtypes.MustParseAddrCluster("10.0.0.3"),
L4Addr: loadbalancer.L4Addr{
Protocol: loadbalancer.TCP,
Port: 8081,
},
},
},
},
},
want: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
if got := DeepEqualBackends(tc.args.backends1, tc.args.backends2); got != tc.want {
t.Errorf("DeepEqualBackends() = %v, want %v", got, tc.want)
}
})
}
}
1 change: 1 addition & 0 deletions pkg/k8s/watchers/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ type policyRepository interface {

type svcManager interface {
DeleteService(frontend loadbalancer.L3n4Addr) (bool, error)
GetDeepCopyServiceByFrontend(frontend loadbalancer.L3n4Addr) (*loadbalancer.SVC, bool)
UpsertService(*loadbalancer.SVC) (bool, loadbalancer.ID, error)
RegisterL7LBService(serviceName, resourceName loadbalancer.ServiceName, ports []string, proxyPort uint16) error
RegisterL7LBServiceBackendSync(serviceName, resourceName loadbalancer.ServiceName, ports []string) error
Expand Down
4 changes: 4 additions & 0 deletions pkg/k8s/watchers/watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ func (f *fakeSvcManager) DeleteService(frontend loadbalancer.L3n4Addr) (bool, er
panic("OnDeleteService(loadbalancer.L3n4Addr) (bool, error) was called and is not set!")
}

func (f *fakeSvcManager) GetDeepCopyServiceByFrontend(frontend loadbalancer.L3n4Addr) (*loadbalancer.SVC, bool) {
return nil, false
}

func (f *fakeSvcManager) UpsertService(p *loadbalancer.SVC) (bool, loadbalancer.ID, error) {
if f.OnUpsertService != nil {
return f.OnUpsertService(p)
Expand Down
12 changes: 12 additions & 0 deletions pkg/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,18 @@ func (s *Service) GetDeepCopyServicesByName(name, namespace string) (svcs []*lb.
return svcs
}

// GetDeepCopyServiceByFrontend returns a deep-copy of the service that matches the Frontend address.
func (s *Service) GetDeepCopyServiceByFrontend(frontend lb.L3n4Addr) (*lb.SVC, bool) {
s.RLock()
defer s.RUnlock()

if svc, found := s.svcByHash[frontend.Hash()]; found {
return svc.deepCopyToLBSVC(), true
}

return nil, false
}

// RestoreServices restores services from BPF maps.
//
// It first restores all the service entries, followed by backend entries.
Expand Down
1 change: 1 addition & 0 deletions test/controlplane/controlplane_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
_ "github.com/cilium/cilium/test/controlplane/ciliumnetworkpolicies"
_ "github.com/cilium/cilium/test/controlplane/node"
_ "github.com/cilium/cilium/test/controlplane/node/ciliumnodes"
_ "github.com/cilium/cilium/test/controlplane/pod/hostport"
_ "github.com/cilium/cilium/test/controlplane/services/dualstack"
_ "github.com/cilium/cilium/test/controlplane/services/graceful-termination"
_ "github.com/cilium/cilium/test/controlplane/services/nodeport"
Expand Down
60 changes: 60 additions & 0 deletions test/controlplane/pod/hostport/generate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env bash
#
# Generate the golden test files for the HostPort test.
#

set -eux

dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )

. "${dir}/../../k8s_versions.sh"

export KUBECONFIG="${dir}/kubeconfig"

: Start a kind cluster
kind create cluster --config "${dir}/manifests/kind-config-1.26.yaml" --name hostport

: Wait for service account to be created
until kubectl get serviceaccount/default; do
sleep 5
done

: Preloading images
kind load --name hostport docker-image "${cilium_container_repo}/${cilium_container_image}:${cilium_version}" || true
kind load --name hostport docker-image "${cilium_container_repo}/${cilium_operator_container_image}:${cilium_version}" || true || true

: Install cilium
cilium install --wait

: Dump the initial state
kubectl get nodes,pods -o yaml > "${dir}/init.yaml"
yasz24 marked this conversation as resolved.
Show resolved Hide resolved

: Apply manifest for hostport-1 pod
kubectl create namespace test
kubectl apply -f "${dir}/manifests/hostport-1.yaml"

: Wait for all pods
kubectl wait -n test --for=condition=ready --timeout=60s pod hostport-1

: Dump the pods
kubectl get -n test pods -o yaml > "${dir}/state1.yaml"

: Put hostport-1 pod in "completed" by terminating nginx container.
kubectl -n test exec -it hostport-1 -c nginx -- /bin/sh -c "kill 1"

: Apply manifest for hostport-2 pod and wait for all pods
kubectl apply -f "${dir}/manifests/hostport-2.yaml"
kubectl wait -n test --for=condition=ready --timeout=60s pod hostport-2

: Dump the pods
kubectl get -n test pods -o yaml > "${dir}/state2.yaml"

: Deleted the completed hostport-1 pod.
kubectl -n test delete pod hostport-1

: Dump the final state
kubectl get -n test pods -o yaml > "${dir}/state3.yaml"

: Tear down the cluster
kind delete clusters hostport
rm -f "${KUBECONFIG}"
58 changes: 58 additions & 0 deletions test/controlplane/pod/hostport/hostport.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium

package hostport

import (
"os"
"path"
"testing"

operatorOption "github.com/cilium/cilium/operator/option"
agentOption "github.com/cilium/cilium/pkg/option"
"github.com/cilium/cilium/test/controlplane"
"github.com/cilium/cilium/test/controlplane/services/helpers"
"github.com/cilium/cilium/test/controlplane/suite"
)

func init() {
suite.AddTestCase("Pod/HostPort", testHostPort)

}

func testHostPort(t *testing.T) {
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}

abs := func(f string) string { return path.Join(cwd, "pod", "hostport", f) }

modConfig := func(daemonCfg *agentOption.DaemonConfig, _ *operatorOption.OperatorConfig) {}

k8sVersions := controlplane.K8sVersions()
// We only need to test the last k8s version
test := suite.NewControlPlaneTest(t, "hostport-control-plane", k8sVersions[len(k8sVersions)-1])
defer test.StopAgent()

// Feed in initial state and start the agent.
test.
UpdateObjectsFromFile(abs("init.yaml")).
SetupEnvironment(modConfig).
StartAgent().
yasz24 marked this conversation as resolved.
Show resolved Hide resolved

// Step 1: Create the first hostport pod.
// lbmap1.golden: Hostport service exists in the Datapath with hostport-1 pod as backend.
UpdateObjectsFromFile(abs("state1.yaml")).
Eventually(func() error { return helpers.ValidateLBMapGoldenFile(abs("lbmap1.golden"), test.Datapath) }).

// Step 2: Mark the first pod as "completed", and create a second hostport pod using the same port
// lbmap2.golden: Hostport service exists in the Datapath with hostport-2 pod as backend.
UpdateObjectsFromFile(abs("state2.yaml")).
Eventually(func() error { return helpers.ValidateLBMapGoldenFile(abs("lbmap2.golden"), test.Datapath) }).

// Step 3: Delete the completed pod, and verify that the hostport service doesn't get deleted.
// lbmap3.golden: Hostport service still exists in the Datapath, with hostport-2 pod as backend.
UpdateObjectsFromFile(abs("state3.yaml")).
Eventually(func() error { return helpers.ValidateLBMapGoldenFile(abs("lbmap3.golden"), test.Datapath) })
}