Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

datapath: Fix wrong rev-NAT xlation due to stale conntrack entry #10984

Merged
merged 3 commits into from Apr 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 0 additions & 2 deletions bpf/bpf_lxc.c
Expand Up @@ -199,7 +199,6 @@ static __always_inline int ipv6_l3_from_lxc(struct __ctx_buff *ctx,
case CT_ESTABLISHED:
/* Did we end up at a stale non-service entry? Recreate if so. */
if (unlikely(ct_state.rev_nat_index != ct_state_new.rev_nat_index)) {
ct_delete6(get_ct_map6(tuple), tuple, ctx);
goto ct_recreate6;
}
break;
Expand Down Expand Up @@ -557,7 +556,6 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,
case CT_ESTABLISHED:
/* Did we end up at a stale non-service entry? Recreate if so. */
if (unlikely(ct_state.rev_nat_index != ct_state_new.rev_nat_index)) {
ct_delete4(get_ct_map4(&tuple), &tuple, ctx);
goto ct_recreate4;
}
break;
Expand Down
20 changes: 16 additions & 4 deletions bpf/lib/nodeport.h
Expand Up @@ -522,6 +522,7 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,

switch (ret) {
case CT_NEW:
redo_all:
ct_state_new.src_sec_id = SECLABEL;
ct_state_new.node_port = 1;
ret = ct_create6(get_ct_map6(&tuple), NULL, &tuple, ctx,
Expand All @@ -530,7 +531,7 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,
return ret;
if (backend_local) {
ct_flip_tuple_dir6(&tuple);
redo:
redo_local:
ct_state_new.rev_nat_index = 0;
ret = ct_create6(get_ct_map6(&tuple), NULL, &tuple, ctx,
CT_INGRESS, &ct_state_new, false);
Expand All @@ -541,13 +542,16 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,

case CT_ESTABLISHED:
case CT_REPLY:
if (unlikely(ct_state.rev_nat_index != svc->rev_nat_index))
goto redo_all;

if (backend_local) {
ct_flip_tuple_dir6(&tuple);
if (!__ct_entry_keep_alive(get_ct_map6(&tuple),
&tuple)) {
ct_state_new.src_sec_id = SECLABEL;
ct_state_new.node_port = 1;
goto redo;
goto redo_local;
}
}
break;
Expand Down Expand Up @@ -1081,6 +1085,7 @@ static __always_inline int nodeport_lb4(struct __ctx_buff *ctx,

switch (ret) {
case CT_NEW:
redo_all:
ct_state_new.src_sec_id = SECLABEL;
ct_state_new.node_port = 1;
ret = ct_create4(get_ct_map4(&tuple), NULL, &tuple, ctx,
Expand All @@ -1089,7 +1094,9 @@ static __always_inline int nodeport_lb4(struct __ctx_buff *ctx,
return ret;
if (backend_local) {
ct_flip_tuple_dir4(&tuple);
redo:
redo_local:
/* Reset rev_nat_index, otherwise ipv4_policy() in
* bpf_lxc will do invalid xlation */
ct_state_new.rev_nat_index = 0;
ret = ct_create4(get_ct_map4(&tuple), NULL, &tuple, ctx,
CT_INGRESS, &ct_state_new, false);
Expand All @@ -1100,13 +1107,18 @@ static __always_inline int nodeport_lb4(struct __ctx_buff *ctx,

case CT_ESTABLISHED:
case CT_REPLY:
/* Recreate CT entries, as the existing one is stale and belongs
* to a flow which target a different svc */
if (unlikely(ct_state.rev_nat_index != svc->rev_nat_index))
goto redo_all;

if (backend_local) {
ct_flip_tuple_dir4(&tuple);
if (!__ct_entry_keep_alive(get_ct_map4(&tuple),
&tuple)) {
ct_state_new.src_sec_id = SECLABEL;
ct_state_new.node_port = 1;
goto redo;
goto redo_local;
}
}
break;
Expand Down
25 changes: 25 additions & 0 deletions test/k8sT/Services.go
Expand Up @@ -965,6 +965,31 @@ var _ = Describe("K8sServicesTest", func() {
testHostPort()
})

It("Tests GH#10983", func() {
var data v1.Service
_, k8s2IP := kubectl.GetNodeInfo(helpers.K8s2)

// We need two NodePort services with the same single endpoint,
// so thus we choose the "test-nodeport{-local,}-k8s2" svc.
// Both svcs will be accessed via the k8s2 node, because
// "test-nodeport-local-k8s2" has the local external traffic
// policy.
err := kubectl.Get(helpers.DefaultNamespace, "svc test-nodeport-local-k8s2").Unmarshal(&data)
Expect(err).Should(BeNil(), "Can not retrieve service")
svc1URL := getHTTPLink(k8s2IP, data.Spec.Ports[0].NodePort)
err = kubectl.Get(helpers.DefaultNamespace, "svc test-nodeport-k8s2").Unmarshal(&data)
Expect(err).Should(BeNil(), "Can not retrieve service")
svc2URL := getHTTPLink(k8s2IP, data.Spec.Ports[0].NodePort)

// Send two requests from the same src IP and port to the endpoint
// via two different NodePort svc to trigger the stale conntrack
// entry issue. Once it's fixed, the second request should not
// fail.
doRequestsFromThirdHostWithLocalPort(svc1URL, 1, false, 64002)
time.Sleep(120 * time.Second) // to reuse the source port
doRequestsFromThirdHostWithLocalPort(svc2URL, 1, false, 64002)
})

SkipContextIf(helpers.DoesNotExistNodeWithoutCilium, "Tests with MetalLB", func() {
var (
metalLB string
Expand Down