Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sysdump: collect 'clustermesh-apiserver' + improvements #513

Merged
merged 5 commits into from
Sep 2, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions internal/cli/cmd/sysdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ func newCmdSysdump() *cobra.Command {
cmd.Flags().StringVar(&sysdumpOptions.CiliumOperatorNamespace,
"cilium-operator-namespace", sysdump.DefaultCiliumOperatorNamespace,
"The namespace Cilium operator is running in")
cmd.Flags().StringVar(&sysdumpOptions.ClustermeshApiserverLabelSelector,
"clustermesh-apiserver-label-selector", sysdump.DefaultClustermeshApiserverLabelSelector,
"The labels used to target 'clustermesh-apiserver' pods")
cmd.Flags().StringVar(&sysdumpOptions.ClustermeshApiserverNamespace,
"clustermesh-apiserver-namespace", sysdump.DefaultClustermeshApiserverNamespace,
"The namespace Cilium operator is running in")
cmd.Flags().BoolVar(&sysdumpOptions.Debug,
"debug", sysdump.DefaultDebug,
"Whether to enable debug logging")
Expand Down
28 changes: 15 additions & 13 deletions sysdump/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,20 @@ import (
)

const (
awsNodeDaemonSetName = "aws-node"
awsNodeDaemonSetNamespace = "kube-system"
ciliumAgentContainerName = "cilium-agent"
ciliumConfigConfigMapName = "cilium-config"
ciliumDaemonSetName = "cilium"
ciliumEtcdSecretsSecretName = "cilium-etcd-secrets"
ciliumOperatorDeploymentName = "cilium-operator"
hubbleContainerName = "hubble"
hubbleDaemonSetName = "hubble"
hubbleRelayContainerName = "hubble-relay"
hubbleRelayDeploymentName = "hubble-relay"
hubbleUIDeploymentName = "hubble-ui"
redacted = "XXXXXX"
awsNodeDaemonSetName = "aws-node"
awsNodeDaemonSetNamespace = "kube-system"
ciliumAgentContainerName = "cilium-agent"
ciliumConfigConfigMapName = "cilium-config"
ciliumDaemonSetName = "cilium"
ciliumEtcdSecretsSecretName = "cilium-etcd-secrets"
ciliumOperatorDeploymentName = "cilium-operator"
clustermeshApiserverDeploymentName = "clustermesh-apiserver"
tklauser marked this conversation as resolved.
Show resolved Hide resolved
hubbleContainerName = "hubble"
hubbleDaemonSetName = "hubble"
hubbleRelayContainerName = "hubble-relay"
hubbleRelayDeploymentName = "hubble-relay"
hubbleUIDeploymentName = "hubble-ui"
redacted = "XXXXXX"
)

const (
Expand All @@ -39,6 +40,7 @@ const (
ciliumNetworkPoliciesFileName = "ciliumnetworkpolicies-<ts>.yaml"
ciliumNodesFileName = "ciliumnodes-<ts>.yaml"
ciliumOperatorDeploymentFileName = "cilium-operator-deployment-<ts>.yaml"
clustermeshApiserverDeploymentFileName = "clustermesh-apiserver-deployment-<ts>.yaml"
eniconfigsFileName = "aws-eniconfigs-<ts>.yaml"
gopsFileName = "gops-%s-%s-<ts>-%s.txt"
hubbleDaemonsetFileName = "hubble-daemonset-<ts>.yaml"
Expand Down
42 changes: 22 additions & 20 deletions sysdump/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,28 @@ const (
)

const (
DefaultCiliumLabelSelector = labelPrefix + "cilium"
DefaultCiliumNamespace = "kube-system"
DefaultCiliumOperatorLabelSelector = "io.cilium/app=operator"
DefaultCiliumOperatorNamespace = DefaultCiliumNamespace
DefaultDebug = false
DefaultHubbleLabelSelector = labelPrefix + "hubble"
DefaultHubbleNamespace = DefaultCiliumNamespace
DefaultHubbleFlowsCount = 10000
DefaultHubbleFlowsTimeout = 5 * time.Second
DefaultHubbleRelayLabelSelector = labelPrefix + "hubble-relay"
DefaultHubbleRelayNamespace = DefaultCiliumNamespace
DefaultHubbleUILabelSelector = labelPrefix + "hubble-ui"
DefaultHubbleUINamespace = DefaultCiliumNamespace
DefaultLargeSysdumpAbortTimeout = 5 * time.Second
DefaultLargeSysdumpThreshold = 20
DefaultLogsSinceTime = 8760 * time.Hour // 1y
DefaultLogsLimitBytes = 1073741824 // 1GiB
DefaultNodeList = ""
DefaultQuick = false
DefaultOutputFileName = "cilium-sysdump-<ts>" // "<ts>" will be replaced with the timestamp
DefaultCiliumLabelSelector = labelPrefix + "cilium"
DefaultCiliumNamespace = "kube-system"
DefaultCiliumOperatorLabelSelector = "io.cilium/app=operator"
DefaultCiliumOperatorNamespace = DefaultCiliumNamespace
DefaultClustermeshApiserverLabelSelector = "k8s-app=clustermesh-apiserver"
DefaultClustermeshApiserverLabelSelector = labelPrefix + "clustermesh-apiserver"
DefaultDebug = false
DefaultHubbleLabelSelector = labelPrefix + "hubble"
DefaultHubbleNamespace = DefaultCiliumNamespace
DefaultHubbleFlowsCount = 10000
DefaultHubbleFlowsTimeout = 5 * time.Second
DefaultHubbleRelayLabelSelector = labelPrefix + "hubble-relay"
DefaultHubbleRelayNamespace = DefaultCiliumNamespace
DefaultHubbleUILabelSelector = labelPrefix + "hubble-ui"
DefaultHubbleUINamespace = DefaultCiliumNamespace
DefaultLargeSysdumpAbortTimeout = 5 * time.Second
DefaultLargeSysdumpThreshold = 20
DefaultLogsSinceTime = 8760 * time.Hour // 1y
DefaultLogsLimitBytes = 1073741824 // 1GiB
DefaultNodeList = ""
DefaultQuick = false
DefaultOutputFileName = "cilium-sysdump-<ts>" // "<ts>" will be replaced with the timestamp
)

var (
Expand Down
41 changes: 40 additions & 1 deletion sysdump/sysdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ type Options struct {
CiliumOperatorLabelSelector string
// The namespace Cilium operator is running in.
CiliumOperatorNamespace string
// The labels used to target 'clustermesh-apiserver' pods.
ClustermeshApiserverLabelSelector string
// The namespace 'clustermesh-apiserver' is running in.
ClustermeshApiserverNamespace string
// Whether to enable debug logging.
Debug bool
// The labels used to target Hubble pods.
Expand Down Expand Up @@ -448,7 +452,7 @@ func (c *Collector) Run() error {
Description: "Collecting the Cilium operator deployment",
Quick: true,
Task: func(ctx context.Context) error {
v, err := c.client.GetDeployment(ctx, c.options.CiliumNamespace, ciliumOperatorDeploymentName, metav1.GetOptions{})
v, err := c.client.GetDeployment(ctx, c.options.CiliumOperatorNamespace, ciliumOperatorDeploymentName, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to collect the Cilium operator deployment: %w", err)
}
Expand All @@ -458,6 +462,24 @@ func (c *Collector) Run() error {
return nil
},
},
{
Description: "Collecting the 'clustermesh-apiserver' deployment",
Quick: true,
Task: func(ctx context.Context) error {
v, err := c.client.GetDeployment(ctx, c.options.ClustermeshApiserverNamespace, clustermeshApiserverDeploymentName, metav1.GetOptions{})
if err != nil {
if errors.IsNotFound(err) {
c.logWarn("deployment %q not found in namespace %q - this is expected if 'clustermesh-apiserver' isn't enabled", clustermeshApiserverDeploymentName, c.options.ClustermeshApiserverNamespace)
return nil
}
return fmt.Errorf("failed to collect the 'clustermesh-apiserver' deployment: %w", err)
}
if err := writeYaml(absoluteTempPath(clustermeshApiserverDeploymentFileName), v); err != nil {
return fmt.Errorf("failed to collect the 'clustermesh-apiserver' deployment: %w", err)
}
return nil
},
},
{
CreatesSubtasks: true,
Description: "Collecting gops stats from Cilium pods",
Expand Down Expand Up @@ -560,6 +582,23 @@ func (c *Collector) Run() error {
return nil
},
},
{
CreatesSubtasks: true,
Description: "Collecting logs from 'clustermesh-apiserver' pods",
Quick: false,
Task: func(ctx context.Context) error {
p, err := c.client.ListPods(ctx, c.options.CiliumNamespace, metav1.ListOptions{
LabelSelector: c.options.ClustermeshApiserverLabelSelector,
})
if err != nil {
return fmt.Errorf("failed to get logs from 'clustermesh-apiserver' pods")
}
if err := c.submitLogsTasks(ctx, filterPods(p, nodeList), c.options.LogsSinceTime, c.options.LogsLimitBytes, absoluteTempPath); err != nil {
return fmt.Errorf("failed to collect logs from 'clustermesh-apiserver' pods")
}
return nil
},
},
{
CreatesSubtasks: true,
Description: "Collecting logs from Hubble pods",
Expand Down