Skip to content

Commit

Permalink
[e2e] Make probe outputs more informative (#5140)
Browse files Browse the repository at this point in the history
"agnhost connect" outputs nothing when it succeeds. The current probe
output like below can't tell which attemp succeeded and if it really
succeeded:

```
TIMEOUT
TIMEOUT
```

We output "CONNECTED" when a connection succeeds and prepend a sequence
number for each attempt, to make the result more informative. An example
output:

```
1: CONNECTED
2: TIMEOUT
3: TIMEOUT
```

Signed-off-by: Quan Tian <qtian@vmware.com>
  • Loading branch information
tnqn committed Jun 27, 2023
1 parent af9477c commit 5c55f17
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 23 deletions.
10 changes: 1 addition & 9 deletions multicluster/test/e2e/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,15 +305,7 @@ func (data *MCTestData) probeFromPodInCluster(
corev1.ProtocolUDP: "udp",
corev1.ProtocolSCTP: "sctp",
}
// There seems to be an issue when running Antrea in Kind where tunnel traffic is dropped at
// first. This leads to the first test being run consistently failing. To avoid this issue
// until it is resolved, we try to connect 3 times.
// See https://github.com/antrea-io/antrea/issues/467.
cmd := []string{
"/bin/sh",
"-c",
fmt.Sprintf("for i in $(seq 1 3); do /agnhost connect %s:%d --timeout=1s --protocol=%s; done;", dstAddr, port, protocolStr[protocol]),
}
cmd := antreae2e.ProbeCommand(fmt.Sprintf("%s:%d", dstAddr, port), protocolStr[protocol], "")
log.Tracef("Running: kubectl exec %s -c %s -n %s -- %s", podName, containerName, podNamespace, strings.Join(cmd, " "))
stdout, stderr, err := data.runCommandFromPod(cluster, podNamespace, podName, containerName, cmd)
// It needs to check both err and stderr because:
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/antreapolicy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3819,8 +3819,8 @@ sleep 3600
for idx, clientName := range clientNames {
log.Tracef("Probing: 1.1.1.1 -> %s:%d", nodeIP(idx), nodePort)
// Connect to NodePort in the fake external network.
cmd = fmt.Sprintf("for i in $(seq 1 3); do ip netns exec %s /agnhost connect %s:%d --timeout=1s --protocol=tcp; done;", testNetns, nodeIP(idx), nodePort)
stdout, stderr, err := data.RunCommandFromPod(data.testNamespace, clientName, agnhostContainerName, []string{"sh", "-c", cmd})
cmd := ProbeCommand(fmt.Sprintf("%s:%d", nodeIP(idx), nodePort), "tcp", fmt.Sprintf("ip netns exec %s", testNetns))
stdout, stderr, err := data.RunCommandFromPod(data.testNamespace, clientName, agnhostContainerName, cmd)
connectivity := Connected
if err != nil || stderr != "" {
// log this error as trace since may be an expected failure
Expand Down
33 changes: 23 additions & 10 deletions test/e2e/k8s_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,27 @@ func (k *KubernetesUtils) getTCPv4SourcePortRangeFromPod(podNamespace, podNameLa
return int32(startPort), int32(endPort), nil
}

// ProbeCommand generates a command to probe the provider url.
// The executor parameter can be used to change where the prober will run. For example, it could be "ip netns exec NAME"
// to run the prober in another namespace.
// We try to connect 3 times. This dates back to when we were using the OVS netdev datapath for Kind clusters, as the
// first packet sent on a tunnel was always dropped (https://github.com/antrea-io/antrea/issues/467). We may be able to
// revisit this now that we use the OVS kernel datapath for Kind.
// "agnhost connect" outputs nothing when it succeeds. We output "CONNECTED" in such case and prepend a sequence
// number for each attempt, to make the result more informative. Example output:
// 1: CONNECTED
// 2: TIMEOUT
// 3: TIMEOUT
func ProbeCommand(url, protocol, executor string) []string {
cmd := []string{
"/bin/sh",
"-c",
fmt.Sprintf(`for i in $(seq 1 3); do echo -n "${i}: " >&2 && %s /agnhost connect %s --timeout=1s --protocol=%s && echo "CONNECTED" >&2; done;`,
executor, url, protocol),
}
return cmd
}

func (k *KubernetesUtils) probe(
pod *v1.Pod,
podName string,
Expand All @@ -172,15 +193,7 @@ func (k *KubernetesUtils) probe(
utils.ProtocolUDP: "udp",
utils.ProtocolSCTP: "sctp",
}
// We try to connect 3 times. This dates back to when we were using the OVS netdev datapath
// for Kind clusters, as the first packet sent on a tunnel was always dropped
// (https://github.com/antrea-io/antrea/issues/467). We may be able to revisit this now that
// we use the OVS kernel datapath for Kind.
cmd := []string{
"/bin/sh",
"-c",
fmt.Sprintf("for i in $(seq 1 3); do /agnhost connect %s:%d --timeout=1s --protocol=%s; done;", dstAddr, port, protocolStr[protocol]),
}
cmd := ProbeCommand(fmt.Sprintf("%s:%d", dstAddr, port), protocolStr[protocol], "")
log.Tracef("Running: kubectl exec %s -c %s -n %s -- %s", pod.Name, containerName, pod.Namespace, strings.Join(cmd, " "))
stdout, stderr, err := k.RunCommandFromPod(pod.Namespace, pod.Name, containerName, cmd)
// It needs to check both err and stderr because:
Expand All @@ -205,7 +218,7 @@ func (k *KubernetesUtils) probe(

// DecideProbeResult uses the probe stderr to decide the connectivity.
func DecideProbeResult(stderr string, probeNum int) PodConnectivityMark {
countConnected := probeNum - strings.Count(stderr, "\n")
countConnected := strings.Count(stderr, "CONNECTED")
countDropped := strings.Count(stderr, "TIMEOUT")
// For our UDP rejection cases, agnhost will return:
// For IPv4: 'UNKNOWN: read udp [src]->[dst]: read: no route to host'
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ func testClusterIPCases(t *testing.T, data *TestData, url string, clients, hostN
}

func testClusterIPFromPod(t *testing.T, data *TestData, url, nodeName, podName string, hostNetwork bool, namespace string, expectedConnectivity PodConnectivityMark) {
cmd := fmt.Sprintf("for i in $(seq 1 3); do /agnhost connect %s --timeout=1s --protocol=tcp; done;", url)
stdout, stderr, err := data.RunCommandFromPod(namespace, podName, agnhostContainerName, []string{"sh", "-c", cmd})
cmd := ProbeCommand(url, "tcp", "")
stdout, stderr, err := data.RunCommandFromPod(namespace, podName, agnhostContainerName, cmd)
connectivity := Connected
if err != nil || stderr != "" {
// If err != nil and stderr == "", then it means this probe failed because of the command instead of connectivity.
Expand Down

0 comments on commit 5c55f17

Please sign in to comment.