Skip to content

Commit

Permalink
clustermesh status: improve error reporting when not ready
Browse files Browse the repository at this point in the history
Let's leverage the additional status flags introduced in
cilium/cilium#26788 to provide more detailed information
concerning why a node cannot connect to a remote cluster,
and simplify troubleshooting. In particular, it might be
due to an etcd connection problem, to the cluster config
being required but not found, or to the synchronization
process being still in progress.

Signed-off-by: Marco Iorio <marco.iorio@isovalent.com>
  • Loading branch information
giorio94 committed Jul 31, 2023
1 parent 047830f commit d9904f1
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 1 deletion.
32 changes: 31 additions & 1 deletion clustermesh/clustermesh.go
Original file line number Diff line number Diff line change
Expand Up @@ -1220,6 +1220,36 @@ func (c *ConnectivityStatus) addError(pod, cluster string, err error) {
m[cluster].Errors = append(m[cluster].Errors, err)
}

func remoteClusterStatusToError(status *models.RemoteCluster) error {
switch {
case status == nil:
return errors.New("unknown status")
case !status.Connected:
return errors.New(status.Status)
case status.Config == nil:
return errors.New("remote cluster configuration retrieval status unknown")
case status.Config.Required && !status.Config.Retrieved:
return errors.New("remote cluster configuration required but not found")
case status.Synced == nil:
return errors.New("synchronization status unknown")
case !(status.Synced.Nodes && status.Synced.Endpoints && status.Synced.Identities && status.Synced.Services):
var toSync []string
appendNotSynced := func(name string, synced bool) {
if !synced {
toSync = append(toSync, name)
}
}
appendNotSynced("endpoints", status.Synced.Endpoints)
appendNotSynced("identities", status.Synced.Identities)
appendNotSynced("nodes", status.Synced.Nodes)
appendNotSynced("services", status.Synced.Services)

return fmt.Errorf("synchronization in progress for %s", strings.Join(toSync, ", "))
default:
return errors.New("not ready")
}
}

func (c *ConnectivityStatus) parseAgentStatus(name string, expected []string, s *status.ClusterMeshAgentConnectivityStatus) {
if c.GlobalServices.Min < 0 || c.GlobalServices.Min > s.GlobalServices {
c.GlobalServices.Min = s.GlobalServices
Expand All @@ -1246,7 +1276,7 @@ func (c *ConnectivityStatus) parseAgentStatus(name string, expected []string, s
ready++
stats.Connected++
} else {
c.addError(name, cluster.Name, fmt.Errorf("cluster is not ready: %s", cluster.Status))
c.addError(name, cluster.Name, remoteClusterStatusToError(cluster))
}
}

Expand Down
69 changes: 69 additions & 0 deletions clustermesh/clustermesh_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"testing"

"github.com/cilium/cilium/api/v1/models"
"github.com/stretchr/testify/assert"
"helm.sh/helm/v3/pkg/chart"
"helm.sh/helm/v3/pkg/release"
Expand Down Expand Up @@ -206,6 +207,74 @@ func TestRemoveFromClustermeshConfig(t *testing.T) {
}
}

func TestRemoteClusterStatusToError(t *testing.T) {
tests := []struct {
name string
status *models.RemoteCluster
expected string
}{
{
name: "nil status",
expected: "unknown status",
},
{
name: "not connected",
status: &models.RemoteCluster{Status: "foo"},
expected: "foo",
},
{
name: "connected, unknown config",
status: &models.RemoteCluster{Connected: true},
expected: "remote cluster configuration retrieval status unknown",
},
{
name: "connected, config not found",
status: &models.RemoteCluster{
Connected: true, Config: &models.RemoteClusterConfig{Required: true}},
expected: "remote cluster configuration required but not found",
},
{
name: "connected, config not required, sync status unknown",
status: &models.RemoteCluster{
Connected: true, Config: &models.RemoteClusterConfig{}},
expected: "synchronization status unknown",
},
{
name: "connected, config found, no resource type synced",
status: &models.RemoteCluster{
Connected: true,
Config: &models.RemoteClusterConfig{Required: true, Retrieved: true},
Synced: &models.RemoteClusterSynced{},
},
expected: "synchronization in progress for endpoints, identities, nodes, services",
},
{
name: "connected, config found, some resource type not synced",
status: &models.RemoteCluster{
Connected: true,
Config: &models.RemoteClusterConfig{Required: true, Retrieved: true},
Synced: &models.RemoteClusterSynced{Endpoints: true, Nodes: true, Services: true},
},
expected: "synchronization in progress for identities",
},
{
name: "none of the above",
status: &models.RemoteCluster{
Connected: true,
Config: &models.RemoteClusterConfig{Required: true, Retrieved: true},
Synced: &models.RemoteClusterSynced{Endpoints: true, Identities: true, Nodes: true, Services: true},
},
expected: "not ready",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.expected, remoteClusterStatusToError(tt.status).Error())
})
}
}

// Helpers

type noOptWriter struct{}
Expand Down

0 comments on commit d9904f1

Please sign in to comment.