Skip to content

Commit

Permalink
Backport: troubleshoot: fixes and updated messages (#16294) (#16309)
Browse files Browse the repository at this point in the history
  • Loading branch information
ndhanushkodi authored Feb 17, 2023
1 parent 6cd08b9 commit 94f4347
Show file tree
Hide file tree
Showing 12 changed files with 135 additions and 80 deletions.
12 changes: 7 additions & 5 deletions agent/xds/validateupstream-test/validateupstream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ListenerType], listenerName)
return ir
},
err: "no listener for upstream \"db\"",
err: "No listener for upstream \"db\"",
},
{
name: "tcp-missing-cluster",
Expand All @@ -66,7 +66,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ClusterType], sni)
return ir
},
err: "no cluster \"db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"db\"",
err: "No cluster \"db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"db\"",
},
{
name: "http-success",
Expand Down Expand Up @@ -124,7 +124,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.RouteType], "db")
return ir
},
err: "no route for upstream \"db\"",
err: "No route for upstream \"db\"",
},
{
name: "redirect",
Expand Down Expand Up @@ -170,7 +170,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ClusterType], sni)
return ir
},
err: "no cluster \"google.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"240.0.0.1\"",
err: "No cluster \"google.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"240.0.0.1\"",
},
{
name: "tproxy-http-redirect-success",
Expand Down Expand Up @@ -230,7 +230,9 @@ func TestValidateUpstreams(t *testing.T) {
var outputErrors string
for _, msgError := range messages.Errors() {
outputErrors += msgError.Message
outputErrors += msgError.PossibleActions
for _, action := range msgError.PossibleActions {
outputErrors += action
}
}
if len(tt.err) == 0 {
require.True(t, messages.Success())
Expand Down
20 changes: 13 additions & 7 deletions command/troubleshoot/proxy/troubleshoot_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ func (c *cmd) Run(args []string) int {

t, err := troubleshoot.NewTroubleshoot(adminBindIP, adminPort)
if err != nil {
c.UI.Error("error generating troubleshoot client: " + err.Error())
c.UI.Error("Error generating troubleshoot client: " + err.Error())
return 1
}
messages, err := t.RunAllTests(c.upstreamEnvoyID, c.upstreamIP)
if err != nil {
c.UI.Error("error running the tests: " + err.Error())
c.UI.Error("Error running the tests: " + err.Error())
return 1
}

Expand All @@ -92,11 +92,16 @@ func (c *cmd) Run(args []string) int {
c.UI.SuccessOutput(o.Message)
} else {
c.UI.ErrorOutput(o.Message)
if o.PossibleActions != "" {
c.UI.UnchangedOutput(o.PossibleActions)
for _, action := range o.PossibleActions {
c.UI.UnchangedOutput("-> " + action)
}
}
}
if messages.Success() {
c.UI.UnchangedOutput("If you are still experiencing issues, you can:")
c.UI.UnchangedOutput("-> Check intentions to ensure the upstream allows traffic from this source")
c.UI.UnchangedOutput("-> If using transparent proxy, ensure DNS resolution is to the same IP you have verified here")
}
return 0
}

Expand All @@ -114,14 +119,15 @@ const (
Usage: consul troubleshoot proxy [options]
Connects to local envoy proxy and troubleshoots service mesh communication issues.
Requires an upstream service envoy identifier.
Requires an upstream service identifier. When debugging explicitly configured upstreams,
use -upstream-envoy-id, when debugging transparent proxy upstreams use -upstream-ip.
Examples:
(explicit upstreams only)
$ consul troubleshoot proxy -upstream-envoy-id foo
(transparent proxy only)
$ consul troubleshoot proxy -upstream-ip <IP>
$ consul troubleshoot proxy -upstream-ip 240.0.0.1
where 'foo' is the upstream envoy identifier which
where 'foo' is the upstream envoy identifier and '240.0.0.1' is an upstream ip which
can be obtained by running:
$ consul troubleshoot upstreams [options]
`
Expand Down
16 changes: 8 additions & 8 deletions command/troubleshoot/upstreams/troubleshoot_upstreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,24 @@ func (c *cmd) Run(args []string) int {
return 1
}

c.UI.Output(fmt.Sprintf("==> Upstreams (explicit upstreams only) (%v)", len(envoyIDs)))
c.UI.HeaderOutput(fmt.Sprintf("Upstreams (explicit upstreams only) (%v)\n", len(envoyIDs)))
for _, u := range envoyIDs {
c.UI.Output(u)
c.UI.UnchangedOutput(u)
}

c.UI.Output(fmt.Sprintf("\n==> Upstream IPs (transparent proxy only) (%v)", len(upstreamIPs)))
c.UI.HeaderOutput(fmt.Sprintf("Upstream IPs (transparent proxy only) (%v)", len(upstreamIPs)))
tbl := cli.NewTable("IPs ", "Virtual ", "Cluster Names")
for _, u := range upstreamIPs {
tbl.AddRow([]string{formatIPs(u.IPs), strconv.FormatBool(u.IsVirtual), formatClusterNames(u.ClusterNames)}, []string{})
}
c.UI.Table(tbl)

c.UI.Output("\nIf you don't see your upstream address or cluster for a transparent proxy upstream:")
c.UI.Output("- Check intentions: Tproxy upstreams are configured based on intentions, make sure you " +
c.UI.UnchangedOutput("\nIf you cannot find the upstream address or cluster for a transparent proxy upstream:")
c.UI.UnchangedOutput("-> Check intentions: Transparent proxy upstreams are configured based on intentions. Make sure you " +
"have configured intentions to allow traffic to your upstream.")
c.UI.Output("- You can also check that the right cluster is being dialed by running a DNS lookup " +
"for the upstream you are dialing (i.e dig backend.svc.consul). If the address you get from that is missing " +
"from the Upstream IPs your proxy may be misconfigured.")
c.UI.UnchangedOutput("-> To check that the right cluster is being dialed, run a DNS lookup " +
"for the upstream you are dialing. For example, run `dig backend.svc.consul` to return the IP address for the `backend` service. If the address you get from that is missing " +
"from the upstream IPs, it means that your proxy may be misconfigured.")
return 0
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ package troubleshoot
import (
"context"
"fmt"
"github.com/stretchr/testify/assert"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"

"github.com/stretchr/testify/require"

libcluster "github.com/hashicorp/consul/test/integration/consul-container/libs/cluster"
Expand Down Expand Up @@ -40,11 +41,12 @@ func TestTroubleshootProxy(t *testing.T) {
"-envoy-admin-endpoint", fmt.Sprintf("localhost:%v", clientAdminPort),
"-upstream-envoy-id", libservice.StaticServerServiceName})
require.NoError(t, err)
certsValid := strings.Contains(output, "certificates are valid")
listenersExist := strings.Contains(output, fmt.Sprintf("listener for upstream \"%s\" found", libservice.StaticServerServiceName))
routesExist := strings.Contains(output, fmt.Sprintf("route for upstream \"%s\" found", libservice.StaticServerServiceName))
healthyEndpoints := strings.Contains(output, "✓ healthy endpoints for cluster")
return upstreamExists && certsValid && listenersExist && routesExist && healthyEndpoints
certsValid := strings.Contains(output, "Certificates are valid")
noRejectedConfig := strings.Contains(output, "Envoy has 0 rejected configurations")
noConnFailure := strings.Contains(output, "Envoy has detected 0 connection failure(s)")
listenersExist := strings.Contains(output, fmt.Sprintf("Listener for upstream \"%s\" found", libservice.StaticServerServiceName))
healthyEndpoints := strings.Contains(output, "Healthy endpoints for cluster")
return upstreamExists && certsValid && listenersExist && noRejectedConfig && noConnFailure && healthyEndpoints
}, 60*time.Second, 10*time.Second)
})

Expand All @@ -58,11 +60,12 @@ func TestTroubleshootProxy(t *testing.T) {
"-upstream-envoy-id", libservice.StaticServerServiceName})
require.NoError(t, err)

certsValid := strings.Contains(output, "certificates are valid")
listenersExist := strings.Contains(output, fmt.Sprintf("listener for upstream \"%s\" found", libservice.StaticServerServiceName))
routesExist := strings.Contains(output, fmt.Sprintf("route for upstream \"%s\" found", libservice.StaticServerServiceName))
endpointUnhealthy := strings.Contains(output, "no healthy endpoints for cluster")
return certsValid && listenersExist && routesExist && endpointUnhealthy
certsValid := strings.Contains(output, "Certificates are valid")
noRejectedConfig := strings.Contains(output, "Envoy has 0 rejected configurations")
noConnFailure := strings.Contains(output, "Envoy has detected 0 connection failure(s)")
listenersExist := strings.Contains(output, fmt.Sprintf("Listener for upstream \"%s\" found", libservice.StaticServerServiceName))
endpointUnhealthy := strings.Contains(output, "No healthy endpoints for cluster")
return certsValid && listenersExist && noRejectedConfig && noConnFailure && endpointUnhealthy
}, 60*time.Second, 10*time.Second)
})
}
20 changes: 16 additions & 4 deletions troubleshoot/proxy/certs.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,21 @@ func (t *Troubleshoot) validateCerts(certs *envoy_admin_v3.Certificates) validat
if certs == nil {
msg := validate.Message{
Success: false,
Message: "certificate object is nil in the proxy configuration",
Message: "Certificate object is nil in the proxy configuration",
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy and ensure XDS updates are being sent to the proxy",
},
}
return []validate.Message{msg}
}

if len(certs.GetCertificates()) == 0 {
msg := validate.Message{
Success: false,
Message: "no certificates found",
Message: "No certificates found",
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy and ensure XDS updates are being sent to the proxy",
},
}
return []validate.Message{msg}
}
Expand All @@ -36,7 +42,10 @@ func (t *Troubleshoot) validateCerts(certs *envoy_admin_v3.Certificates) validat
if now.After(cacert.GetExpirationTime().AsTime()) {
msg := validate.Message{
Success: false,
Message: "ca certificate is expired",
Message: "CA certificate is expired",
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy and ensure XDS updates are being sent to the proxy",
},
}
certMessages = append(certMessages, msg)
}
Expand All @@ -46,7 +55,10 @@ func (t *Troubleshoot) validateCerts(certs *envoy_admin_v3.Certificates) validat
if now.After(cc.GetExpirationTime().AsTime()) {
msg := validate.Message{
Success: false,
Message: "certificate chain is expired",
Message: "Certificate chain is expired",
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy and ensure XDS updates are being sent to the proxy",
},
}
certMessages = append(certMessages, msg)
}
Expand Down
12 changes: 7 additions & 5 deletions troubleshoot/proxy/certs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ func TestValidateCerts(t *testing.T) {
}{
"cert is nil": {
certs: nil,
expectedError: "certificate object is nil in the proxy configuration",
expectedError: "Certificate object is nil in the proxy configuration",
},
"no certificates": {
certs: &envoy_admin_v3.Certificates{
Certificates: []*envoy_admin_v3.Certificate{},
},
expectedError: "no certificates found",
expectedError: "No certificates found",
},
"ca expired": {
certs: &envoy_admin_v3.Certificates{
Expand All @@ -41,7 +41,7 @@ func TestValidateCerts(t *testing.T) {
},
},
},
expectedError: "ca certificate is expired",
expectedError: "CA certificate is expired",
},
"cert expired": {
certs: &envoy_admin_v3.Certificates{
Expand All @@ -55,7 +55,7 @@ func TestValidateCerts(t *testing.T) {
},
},
},
expectedError: "certificate chain is expired",
expectedError: "Certificate chain is expired",
},
}

Expand All @@ -67,7 +67,9 @@ func TestValidateCerts(t *testing.T) {
var outputErrors string
for _, msgError := range messages.Errors() {
outputErrors += msgError.Message
outputErrors += msgError.PossibleActions
for _, action := range msgError.PossibleActions {
outputErrors += action
}
}
if tc.expectedError == "" {
require.True(t, messages.Success())
Expand Down
20 changes: 15 additions & 5 deletions troubleshoot/proxy/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package troubleshoot
import (
"encoding/json"
"fmt"

envoy_admin_v3 "github.com/envoyproxy/go-control-plane/envoy/admin/v3"
"github.com/hashicorp/consul/troubleshoot/validate"
)
Expand Down Expand Up @@ -32,10 +33,19 @@ func (t *Troubleshoot) troubleshootStats() (validate.Messages, error) {
}
}

statMessages = append(statMessages, validate.Message{
Success: true,
Message: fmt.Sprintf("Envoy has %v rejected configurations", totalConfigRejections),
})
if totalConfigRejections > 0 {
statMessages = append(statMessages, validate.Message{
Message: fmt.Sprintf("Envoy has %v rejected configurations", totalConfigRejections),
PossibleActions: []string{
"Check the logs of the Consul agent configuring the local proxy to see why Envoy rejected this configuration",
},
})
} else {
statMessages = append(statMessages, validate.Message{
Success: true,
Message: fmt.Sprintf("Envoy has %v rejected configurations", totalConfigRejections),
})
}

connectionFailureStats, err := t.getEnvoyStats("upstream_cx_connect_fail")
if err != nil {
Expand All @@ -50,7 +60,7 @@ func (t *Troubleshoot) troubleshootStats() (validate.Messages, error) {
}
statMessages = append(statMessages, validate.Message{
Success: true,
Message: fmt.Sprintf("Envoy has detected %v connection failure(s).", totalCxFailures),
Message: fmt.Sprintf("Envoy has detected %v connection failure(s)", totalCxFailures),
})
return statMessages, nil
}
Expand Down
13 changes: 2 additions & 11 deletions troubleshoot/proxy/troubleshoot_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,6 @@ import (
"github.com/hashicorp/consul/troubleshoot/validate"
)

const (
listeners string = "type.googleapis.com/envoy.admin.v3.ListenersConfigDump"
clusters string = "type.googleapis.com/envoy.admin.v3.ClustersConfigDump"
routes string = "type.googleapis.com/envoy.admin.v3.RoutesConfigDump"
endpoints string = "type.googleapis.com/envoy.admin.v3.EndpointsConfigDump"
bootstrap string = "type.googleapis.com/envoy.admin.v3.BootstrapConfigDump"
httpConnManager string = "type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager"
)

type Troubleshoot struct {
client *api.Client
envoyAddr net.IPAddr
Expand Down Expand Up @@ -79,7 +70,7 @@ func (t *Troubleshoot) RunAllTests(upstreamEnvoyID, upstreamIP string) (validate
if errors := messages.Errors(); len(errors) == 0 {
msg := validate.Message{
Success: true,
Message: "certificates are valid",
Message: "Certificates are valid",
}
allTestMessages = append(allTestMessages, msg)
}
Expand All @@ -97,7 +88,7 @@ func (t *Troubleshoot) RunAllTests(upstreamEnvoyID, upstreamIP string) (validate
if errors := messages.Errors(); len(errors) == 0 {
msg := validate.Message{
Success: true,
Message: "upstream resources are valid",
Message: "Upstream resources are valid",
}
allTestMessages = append(allTestMessages, msg)
}
Expand Down
5 changes: 3 additions & 2 deletions troubleshoot/proxy/upstreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package troubleshoot

import (
"fmt"

envoy_admin_v3 "github.com/envoyproxy/go-control-plane/envoy/admin/v3"
envoy_listener_v3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
envoy_route_v3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3"
Expand Down Expand Up @@ -29,7 +30,7 @@ func (t *Troubleshoot) GetUpstreams() ([]string, []UpstreamIP, error) {

for _, cfg := range t.envoyConfigDump.Configs {
switch cfg.TypeUrl {
case listeners:
case listenersType:
lcd := &envoy_admin_v3.ListenersConfigDump{}

err := proto.Unmarshal(cfg.GetValue(), lcd)
Expand Down Expand Up @@ -135,7 +136,7 @@ func getClustersFromRoutes(routeName string, cfgDump *envoy_admin_v3.ConfigDump)

for _, cfg := range cfgDump.Configs {
switch cfg.TypeUrl {
case routes:
case routesType:
rcd := &envoy_admin_v3.RoutesConfigDump{}

err := proto.Unmarshal(cfg.GetValue(), rcd)
Expand Down
Loading

0 comments on commit 94f4347

Please sign in to comment.