Skip to content

Commit

Permalink
Add etcd s3 config secret implementation
Browse files Browse the repository at this point in the history
* Move snapshot structs and functions into pkg/etcd/snapshot
* Move s3 client code and functions into pkg/etcd/s3
* Refactor pkg/etcd to track snapshot and s3 moves
* Add support for reading s3 client config from secret
* Add minio client cache, since S3 client configuration can now be
  changed at runtime by modifying the secret, and don't want to have to
  create a new minio client every time we read config.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
  • Loading branch information
brandond committed Jun 14, 2024
1 parent b4d4ed8 commit 3d7cbd8
Show file tree
Hide file tree
Showing 19 changed files with 1,597 additions and 946 deletions.
12 changes: 10 additions & 2 deletions docs/adrs/etcd-s3-secret.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Support etcd Snapshot Configuration via Kubernetes Secret

Date: 2024-02-06
Revised: 2024-06-10

## Status

Expand Down Expand Up @@ -34,8 +35,8 @@ avoids embedding the credentials directly in the system configuration, chart val
* We will add a `--etcd-s3-proxy` flag that can be used to set the proxy used by the S3 client. This will override the
settings that golang's default HTTP client reads from the `HTTP_PROXY/HTTPS_PROXY/NO_PROXY` environment varibles.
* We will add support for reading etcd snapshot S3 configuration from a Secret. The secret name will be specified via a new
`--etcd-s3-secret` flag, which accepts the name of the Secret in the `kube-system` namespace.
* Presence of the `--etcd-s3-secret` flag does not imply `--etcd-s3`. If S3 is not enabled by use of the `--etcd-s3` flag,
`--etcd-s3-config-secret` flag, which accepts the name of the Secret in the `kube-system` namespace.
* Presence of the `--etcd-s3-config-secret` flag does not imply `--etcd-s3`. If S3 is not enabled by use of the `--etcd-s3` flag,
the Secret will not be used.
* The Secret does not need to exist when K3s starts; it will be checked for every time a snapshot operation is performed.
* Secret and CLI/config values will NOT be merged. The Secret will provide values to be used in absence of other
Expand Down Expand Up @@ -64,6 +65,7 @@ stringData:
etcd-s3-access-key: "AWS_ACCESS_KEY_ID"
etcd-s3-secret-key: "AWS_SECRET_ACCESS_KEY"
etcd-s3-bucket: "bucket"
etcd-s3-folder: "folder"
etcd-s3-region: "us-east-1"
etcd-s3-insecure: "false"
etcd-s3-timeout: "5m"
Expand All @@ -73,3 +75,9 @@ stringData:
## Consequences

This will require additional documentation, tests, and QA work to validate use of secrets for s3 snapshot configuration.

## Revisions

#### 2024-06-10:
* Changed flag to `etcd-s3-config-secret` to avoid confusion with `etcd-s3-secret-key`.
* Added `etcd-s3-folder` to example Secret.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,9 @@ require (
github.com/vishvananda/netlink v1.2.1-beta.2
github.com/yl2chen/cidranger v1.0.2
go.etcd.io/etcd/api/v3 v3.5.13
go.etcd.io/etcd/client/pkg/v3 v3.5.13
go.etcd.io/etcd/client/v3 v3.5.13
go.etcd.io/etcd/etcdutl/v3 v3.5.9
go.etcd.io/etcd/server/v3 v3.5.13
go.uber.org/zap v1.27.0
golang.org/x/crypto v0.22.0
golang.org/x/net v0.24.0
golang.org/x/sync v0.7.0
Expand Down Expand Up @@ -416,6 +414,7 @@ require (
github.com/xlab/treeprint v1.2.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
go.etcd.io/bbolt v1.3.9 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.13 // indirect
go.etcd.io/etcd/client/v2 v2.305.13 // indirect
go.etcd.io/etcd/pkg/v3 v3.5.13 // indirect
go.etcd.io/etcd/raft/v3 v3.5.13 // indirect
Expand All @@ -436,6 +435,7 @@ require (
go.uber.org/fx v1.20.1 // indirect
go.uber.org/mock v0.4.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect
golang.org/x/mod v0.17.0 // indirect
golang.org/x/oauth2 v0.17.0 // indirect
Expand Down
10 changes: 10 additions & 0 deletions pkg/cli/cmds/etcd_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@ var EtcdSnapshotFlags = []cli.Flag{
Usage: "(db) S3 folder",
Destination: &ServerConfig.EtcdS3Folder,
},
&cli.StringFlag{
Name: "s3-proxy,etcd-s3-proxy",
Usage: "(db) Proxy server to use when connecting to S3, overriding any proxy-releated environment variables",
Destination: &ServerConfig.EtcdS3Proxy,
},
&cli.StringFlag{
Name: "s3-config-secret,etcd-s3-config-secret",
Usage: "(db) Name of secret in the kube-system namespace used to configure S3, if etcd-s3 is enabled and no other etcd-s3 options are set",
Destination: &ServerConfig.EtcdS3ConfigSecret,
},
&cli.BoolFlag{
Name: "s3-insecure,etcd-s3-insecure",
Usage: "(db) Disables S3 over HTTPS",
Expand Down
12 changes: 12 additions & 0 deletions pkg/cli/cmds/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ type Server struct {
EtcdS3BucketName string
EtcdS3Region string
EtcdS3Folder string
EtcdS3Proxy string
EtcdS3ConfigSecret string
EtcdS3Timeout time.Duration
EtcdS3Insecure bool
ServiceLBNamespace string
Expand Down Expand Up @@ -430,6 +432,16 @@ var ServerFlags = []cli.Flag{
Usage: "(db) S3 folder",
Destination: &ServerConfig.EtcdS3Folder,
},
&cli.StringFlag{
Name: "etcd-s3-proxy",
Usage: "(db) Proxy server to use when connecting to S3, overriding any proxy-releated environment variables",
Destination: &ServerConfig.EtcdS3Proxy,
},
&cli.StringFlag{
Name: "etcd-s3-config-secret",
Usage: "(db) Name of secret in the kube-system namespace used to configure S3, if etcd-s3 is enabled and no other etcd-s3 options are set",
Destination: &ServerConfig.EtcdS3ConfigSecret,
},
&cli.BoolFlag{
Name: "etcd-s3-insecure",
Usage: "(db) Disables S3 over HTTPS",
Expand Down
26 changes: 15 additions & 11 deletions pkg/cli/etcdsnapshot/etcd_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/k3s-io/k3s/pkg/cli/cmds"
"github.com/k3s-io/k3s/pkg/clientaccess"
"github.com/k3s-io/k3s/pkg/cluster/managed"
"github.com/k3s-io/k3s/pkg/daemons/config"
"github.com/k3s-io/k3s/pkg/etcd"
"github.com/k3s-io/k3s/pkg/proctitle"
"github.com/k3s-io/k3s/pkg/server"
Expand Down Expand Up @@ -50,17 +51,20 @@ func commandSetup(app *cli.Context, cfg *cmds.Server) (*etcd.SnapshotRequest, *c
}

if cfg.EtcdS3 {
sr.S3 = &etcd.SnapshotRequestS3{}
sr.S3.AccessKey = cfg.EtcdS3AccessKey
sr.S3.Bucket = cfg.EtcdS3BucketName
sr.S3.Endpoint = cfg.EtcdS3Endpoint
sr.S3.EndpointCA = cfg.EtcdS3EndpointCA
sr.S3.Folder = cfg.EtcdS3Folder
sr.S3.Insecure = cfg.EtcdS3Insecure
sr.S3.Region = cfg.EtcdS3Region
sr.S3.SecretKey = cfg.EtcdS3SecretKey
sr.S3.SkipSSLVerify = cfg.EtcdS3SkipSSLVerify
sr.S3.Timeout = metav1.Duration{Duration: cfg.EtcdS3Timeout}
sr.S3 = &config.EtcdS3{
AccessKey: cfg.EtcdS3AccessKey,
Bucket: cfg.EtcdS3BucketName,
ConfigSecret: cfg.EtcdS3ConfigSecret,
Endpoint: cfg.EtcdS3Endpoint,
EndpointCA: cfg.EtcdS3EndpointCA,
Folder: cfg.EtcdS3Folder,
Insecure: cfg.EtcdS3Insecure,
Proxy: cfg.EtcdS3Proxy,
Region: cfg.EtcdS3Region,
SecretKey: cfg.EtcdS3SecretKey,
SkipSSLVerify: cfg.EtcdS3SkipSSLVerify,
Timeout: metav1.Duration{Duration: cfg.EtcdS3Timeout},
}
// extend request timeout to allow the S3 operation to complete
timeout += cfg.EtcdS3Timeout
}
Expand Down
28 changes: 17 additions & 11 deletions pkg/cli/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/rancher/wrangler/v3/pkg/signals"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilnet "k8s.io/apimachinery/pkg/util/net"
kubeapiserverflag "k8s.io/component-base/cli/flag"
"k8s.io/kubernetes/pkg/controlplane/apiserver/options"
Expand Down Expand Up @@ -186,17 +187,22 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
serverConfig.ControlConfig.EtcdSnapshotCron = cfg.EtcdSnapshotCron
serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir
serverConfig.ControlConfig.EtcdSnapshotRetention = cfg.EtcdSnapshotRetention
serverConfig.ControlConfig.EtcdS3 = cfg.EtcdS3
serverConfig.ControlConfig.EtcdS3Endpoint = cfg.EtcdS3Endpoint
serverConfig.ControlConfig.EtcdS3EndpointCA = cfg.EtcdS3EndpointCA
serverConfig.ControlConfig.EtcdS3SkipSSLVerify = cfg.EtcdS3SkipSSLVerify
serverConfig.ControlConfig.EtcdS3AccessKey = cfg.EtcdS3AccessKey
serverConfig.ControlConfig.EtcdS3SecretKey = cfg.EtcdS3SecretKey
serverConfig.ControlConfig.EtcdS3BucketName = cfg.EtcdS3BucketName
serverConfig.ControlConfig.EtcdS3Region = cfg.EtcdS3Region
serverConfig.ControlConfig.EtcdS3Folder = cfg.EtcdS3Folder
serverConfig.ControlConfig.EtcdS3Insecure = cfg.EtcdS3Insecure
serverConfig.ControlConfig.EtcdS3Timeout = cfg.EtcdS3Timeout
if cfg.EtcdS3 {
serverConfig.ControlConfig.EtcdS3 = &config.EtcdS3{
AccessKey: cfg.EtcdS3AccessKey,
Bucket: cfg.EtcdS3BucketName,
ConfigSecret: cfg.EtcdS3ConfigSecret,
Endpoint: cfg.EtcdS3Endpoint,
EndpointCA: cfg.EtcdS3EndpointCA,
Folder: cfg.EtcdS3Folder,
Insecure: cfg.EtcdS3Insecure,
Proxy: cfg.EtcdS3Proxy,
Region: cfg.EtcdS3Region,
SecretKey: cfg.EtcdS3SecretKey,
SkipSSLVerify: cfg.EtcdS3SkipSSLVerify,
Timeout: metav1.Duration{Duration: cfg.EtcdS3Timeout},
}
}
} else {
logrus.Info("ETCD snapshots are disabled")
}
Expand Down
49 changes: 27 additions & 22 deletions pkg/daemons/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ import (
"sort"
"strings"
"sync"
"time"

"github.com/k3s-io/k3s/pkg/generated/controllers/k3s.cattle.io"
"github.com/k3s-io/kine/pkg/endpoint"
"github.com/rancher/wharfie/pkg/registries"
"github.com/rancher/wrangler/v3/pkg/generated/controllers/core"
"github.com/rancher/wrangler/v3/pkg/leader"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apiserver/pkg/authentication/authenticator"
"k8s.io/client-go/tools/record"
Expand Down Expand Up @@ -62,6 +62,21 @@ type Node struct {
DefaultRuntime string
}

type EtcdS3 struct {
AccessKey string `json:"accessKey,omitempty"`
Bucket string `json:"bucket,omitempty"`
ConfigSecret string `json:"configSecret,omitempty"`
Endpoint string `json:"endpoint,omitempty"`
EndpointCA string `json:"endpointCA,omitempty"`
Folder string `json:"folder,omitempty"`
Proxy string `json:"proxy,omitempty"`
Region string `json:"region,omitempty"`
SecretKey string `json:"secretKey,omitempty"`
Insecure bool `json:"insecure,omitempty"`
SkipSSLVerify bool `json:"skipSSLVerify,omitempty"`
Timeout metav1.Duration `json:"timeout,omitempty"`
}

type Containerd struct {
Address string
Log string
Expand Down Expand Up @@ -216,27 +231,17 @@ type Control struct {
EncryptSkip bool
MinTLSVersion string
CipherSuites []string
TLSMinVersion uint16 `json:"-"`
TLSCipherSuites []uint16 `json:"-"`
EtcdSnapshotName string `json:"-"`
EtcdDisableSnapshots bool `json:"-"`
EtcdExposeMetrics bool `json:"-"`
EtcdSnapshotDir string `json:"-"`
EtcdSnapshotCron string `json:"-"`
EtcdSnapshotRetention int `json:"-"`
EtcdSnapshotCompress bool `json:"-"`
EtcdListFormat string `json:"-"`
EtcdS3 bool `json:"-"`
EtcdS3Endpoint string `json:"-"`
EtcdS3EndpointCA string `json:"-"`
EtcdS3SkipSSLVerify bool `json:"-"`
EtcdS3AccessKey string `json:"-"`
EtcdS3SecretKey string `json:"-"`
EtcdS3BucketName string `json:"-"`
EtcdS3Region string `json:"-"`
EtcdS3Folder string `json:"-"`
EtcdS3Timeout time.Duration `json:"-"`
EtcdS3Insecure bool `json:"-"`
TLSMinVersion uint16 `json:"-"`
TLSCipherSuites []uint16 `json:"-"`
EtcdSnapshotName string `json:"-"`
EtcdDisableSnapshots bool `json:"-"`
EtcdExposeMetrics bool `json:"-"`
EtcdSnapshotDir string `json:"-"`
EtcdSnapshotCron string `json:"-"`
EtcdSnapshotRetention int `json:"-"`
EtcdSnapshotCompress bool `json:"-"`
EtcdListFormat string `json:"-"`
EtcdS3 *EtcdS3 `json:"-"`
ServerNodeName string
VLevel int
VModule string
Expand Down
44 changes: 20 additions & 24 deletions pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"net/url"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
Expand All @@ -25,6 +24,8 @@ import (
"github.com/k3s-io/k3s/pkg/daemons/config"
"github.com/k3s-io/k3s/pkg/daemons/control/deps"
"github.com/k3s-io/k3s/pkg/daemons/executor"
"github.com/k3s-io/k3s/pkg/etcd/s3"
"github.com/k3s-io/k3s/pkg/etcd/snapshot"
"github.com/k3s-io/k3s/pkg/server/auth"
"github.com/k3s-io/k3s/pkg/util"
"github.com/k3s-io/k3s/pkg/version"
Expand All @@ -39,10 +40,8 @@ import (
"github.com/sirupsen/logrus"
"go.etcd.io/etcd/api/v3/etcdserverpb"
"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
"go.etcd.io/etcd/client/pkg/v3/logutil"
clientv3 "go.etcd.io/etcd/client/v3"
"go.etcd.io/etcd/etcdutl/v3/snapshot"
"go.uber.org/zap"
snapshotv3 "go.etcd.io/etcd/etcdutl/v3/snapshot"
"golang.org/x/sync/semaphore"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -93,8 +92,6 @@ var (
ErrAddressNotSet = errors.New("apiserver addresses not yet set")
ErrNotMember = errNotMember()
ErrMemberListFailed = errMemberListFailed()

invalidKeyChars = regexp.MustCompile(`[^-._a-zA-Z0-9]`)
)

type NodeControllerGetter func() controllerv1.NodeController
Expand All @@ -110,8 +107,8 @@ type ETCD struct {
name string
address string
cron *cron.Cron
s3 *S3
cancel context.CancelFunc
s3 *s3.Controller
snapshotSem *semaphore.Weighted
}

Expand Down Expand Up @@ -165,8 +162,6 @@ func (e *MemberListError) Is(target error) bool {

func errMemberListFailed() error { return &MemberListError{} }

// NewETCD creates a new value of type
// ETCD with an initialized cron value.
func NewETCD() *ETCD {
return &ETCD{
cron: cron.New(cron.WithLogger(cronLogger)),
Expand Down Expand Up @@ -390,14 +385,21 @@ func (e *ETCD) Reset(ctx context.Context, rebootstrap func() error) error {

// If asked to restore from a snapshot, do so
if e.config.ClusterResetRestorePath != "" {
if e.config.EtcdS3 {
if e.config.EtcdS3 != nil {
logrus.Infof("Retrieving etcd snapshot %s from S3", e.config.ClusterResetRestorePath)
if err := e.initS3IfNil(ctx); err != nil {
return err
s3client, err := e.initS3IfNil(ctx)
if err != nil {
return errors.Wrap(err, "failed to initialize S3 client")
}
if err := e.s3.Download(ctx); err != nil {
return err
dir, err := snapshotDir(e.config, true)
if err != nil {
return errors.Wrap(err, "failed to get the snapshot dir")
}
path, err := s3client.Download(ctx, e.config.ClusterResetRestorePath, dir)
if err != nil {
return errors.Wrap(err, "failed to download snapshot from S3")
}
e.config.ClusterResetRestorePath = path
logrus.Infof("S3 download complete for %s", e.config.ClusterResetRestorePath)
}

Expand Down Expand Up @@ -1462,13 +1464,13 @@ func (e *ETCD) Restore(ctx context.Context) error {
}

var restorePath string
if strings.HasSuffix(e.config.ClusterResetRestorePath, compressedExtension) {
snapshotDir, err := snapshotDir(e.config, true)
if strings.HasSuffix(e.config.ClusterResetRestorePath, snapshot.CompressedExtension) {
dir, err := snapshotDir(e.config, true)
if err != nil {
return errors.Wrap(err, "failed to get the snapshot dir")
}

decompressSnapshot, err := e.decompressSnapshot(snapshotDir, e.config.ClusterResetRestorePath)
decompressSnapshot, err := e.decompressSnapshot(dir, e.config.ClusterResetRestorePath)
if err != nil {
return err
}
Expand All @@ -1484,13 +1486,7 @@ func (e *ETCD) Restore(ctx context.Context) error {
}

logrus.Infof("Pre-restore etcd database moved to %s", oldDataDir)

lg, err := logutil.CreateDefaultZapLogger(zap.InfoLevel)
if err != nil {
return err
}

return snapshot.NewV3(lg).Restore(snapshot.RestoreConfig{
return snapshotv3.NewV3(e.client.GetLogger()).Restore(snapshotv3.RestoreConfig{
SnapshotPath: restorePath,
Name: e.name,
OutputDataDir: dbDir(e.config),
Expand Down
Loading

0 comments on commit 3d7cbd8

Please sign in to comment.