Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrated Storage Cloud Auto-Join #10095

Merged
merged 15 commits into from
Oct 13, 2020
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ Vagrantfile

dist/*

tags
# ignore ctags
./tags

# Editor backups
*~
Expand Down Expand Up @@ -105,3 +106,6 @@ website/.cache
website/assets/node_modules
website/assets/public
website/components/node_modules

.buildcache/
.releaser/
1 change: 1 addition & 0 deletions api/sys_raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ type RaftJoinResponse struct {

// RaftJoinRequest represents the parameters consumed by the raft join API
type RaftJoinRequest struct {
AutoJoin string `json:"auto_join"`
LeaderAPIAddr string `json:"leader_api_addr"`
LeaderCACert string `json:"leader_ca_cert"`
LeaderClientCert string `json:"leader_client_cert"`
Expand Down
24 changes: 18 additions & 6 deletions command/operator_raft_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,18 @@ func (c *OperatorRaftJoinCommand) Synopsis() string {

func (c *OperatorRaftJoinCommand) Help() string {
helpText := `
Usage: vault operator raft join [options] <leader-api-addr>
Usage: vault operator raft join [options] <leader-api-addr|auto-join-configuration>

Join the current node as a peer to the Raft cluster by providing the address
of the Raft leader node.

$ vault operator raft join "http://127.0.0.2:8200"

Join the current node as a peer to the Raft cluster by providing cloud auto-join
configuration.

$ vault operator raft join "provider=aws region=eu-west-1 ..."

TLS certificate data can also be consumed from a file on disk by prefixing with
the "@" symbol. For example:

Expand Down Expand Up @@ -106,14 +111,14 @@ func (c *OperatorRaftJoinCommand) Run(args []string) int {
return 1
}

leaderAPIAddr := ""
leaderInfo := ""

args = f.Args()
switch len(args) {
case 0:
// No-op: This is acceptable if we're using raft for HA-only
case 1:
leaderAPIAddr = strings.TrimSpace(args[0])
leaderInfo = strings.TrimSpace(args[0])
default:
c.UI.Error(fmt.Sprintf("Too many arguments (expected 0-1, got %d)", len(args)))
return 1
Expand Down Expand Up @@ -143,14 +148,21 @@ func (c *OperatorRaftJoinCommand) Run(args []string) int {
return 2
}

resp, err := client.Sys().RaftJoin(&api.RaftJoinRequest{
LeaderAPIAddr: leaderAPIAddr,
joinReq := &api.RaftJoinRequest{
LeaderCACert: leaderCACert,
LeaderClientCert: leaderClientCert,
LeaderClientKey: leaderClientKey,
Retry: c.flagRetry,
NonVoter: c.flagNonVoter,
})
}

if strings.HasPrefix(leaderInfo, "provider=") {
alexanderbez marked this conversation as resolved.
Show resolved Hide resolved
joinReq.AutoJoin = leaderInfo
} else {
joinReq.LeaderAPIAddr = leaderInfo
}

resp, err := client.Sys().RaftJoin(joinReq)
if err != nil {
c.UI.Error(fmt.Sprintf("Error joining the node to the Raft cluster: %s", err))
return 2
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
cloud.google.com/go/spanner v1.5.1
cloud.google.com/go/storage v1.6.0
github.com/Azure/azure-storage-blob-go v0.10.0
github.com/Azure/go-autorest/autorest v0.10.1
github.com/Azure/go-autorest/autorest v0.11.0
github.com/DataDog/zstd v1.4.5 // indirect
github.com/NYTimes/gziphandler v1.1.1
github.com/SAP/go-hdb v0.14.1
Expand Down Expand Up @@ -55,6 +55,7 @@ require (
github.com/hashicorp/errwrap v1.0.0
github.com/hashicorp/go-bindata v3.0.8-0.20180209072458-bf7910af8997+incompatible
github.com/hashicorp/go-cleanhttp v0.5.1
github.com/hashicorp/go-discover v0.0.0-20200812215701-c4b85f6ed31f
github.com/hashicorp/go-gcp-common v0.6.0
github.com/hashicorp/go-hclog v0.14.1
github.com/hashicorp/go-kms-wrapping v0.5.16
Expand Down Expand Up @@ -147,7 +148,7 @@ require (
go.etcd.io/etcd v0.5.0-alpha.5.0.20200425165423-262c93980547
go.mongodb.org/mongo-driver v1.2.1
go.uber.org/atomic v1.6.0
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9
golang.org/x/net v0.0.0-20200602114024-627f9648deb9
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae // indirect
Expand Down
117 changes: 115 additions & 2 deletions go.sum

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions http/sys_raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func handleSysRaftJoinPost(core *vault.Core, w http.ResponseWriter, r *http.Requ

leaderInfos := []*raft.LeaderJoinInfo{
{
AutoJoin: req.AutoJoin,
LeaderAPIAddr: req.LeaderAPIAddr,
TLSConfig: tlsConfig,
Retry: req.Retry,
Expand All @@ -90,6 +91,7 @@ type JoinResponse struct {
}

type JoinRequest struct {
AutoJoin string `json:"auto_join"`
LeaderAPIAddr string `json:"leader_api_addr"`
LeaderCACert string `json:"leader_ca_cert"`
LeaderClientCert string `json:"leader_client_cert"`
Expand Down
13 changes: 11 additions & 2 deletions physical/raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ type RaftBackend struct {
// LeaderJoinInfo contains information required by a node to join itself as a
// follower to an existing raft cluster
type LeaderJoinInfo struct {
// AutoJoin defines any cloud auto-join metadata. If supplied, Vault will
// attempt to automatically discover peers in addition to what can be provided
// via 'leader_api_addr'.
AutoJoin string `json:"auto_join"`

// LeaderAPIAddr is the address of the leader node to connect to
LeaderAPIAddr string `json:"leader_api_addr"`

Expand Down Expand Up @@ -178,11 +183,15 @@ func (b *RaftBackend) JoinConfig() ([]*LeaderJoinInfo, error) {
return nil, errors.New("invalid retry_join config")
}

for _, info := range leaderInfos {
for i, info := range leaderInfos {
if len(info.AutoJoin) != 0 && len(info.LeaderAPIAddr) != 0 {
return nil, errors.New("cannot provide both a leader_api_addr and auto_join")
}

info.Retry = true
info.TLSConfig, err = parseTLSInfo(info)
if err != nil {
return nil, errwrap.Wrapf(fmt.Sprintf("failed to create tls config to communicate with leader node %q: {{err}}", info.LeaderAPIAddr), err)
return nil, errwrap.Wrapf(fmt.Sprintf("failed to create tls config to communicate with leader node (retry_join index: %d): {{err}}", i), err)
}
}

Expand Down
51 changes: 51 additions & 0 deletions vault/external_tests/raft/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/vault"
"github.com/stretchr/testify/require"
"golang.org/x/net/http2"
)

Expand All @@ -40,6 +41,56 @@ func raftCluster(t testing.TB) *vault.TestCluster {
return cluster
}

func TestRaft_RetryAutoJoin(t *testing.T) {
t.Parallel()

var (
conf vault.CoreConfig

opts = vault.TestClusterOptions{HandlerFunc: vaulthttp.Handler}
)

teststorage.RaftBackendSetup(&conf, &opts)

opts.SetupFunc = nil
cluster := vault.NewTestCluster(t, &conf, &opts)

cluster.Start()
defer cluster.Cleanup()

addressProvider := &testhelpers.TestRaftServerAddressProvider{Cluster: cluster}
leaderCore := cluster.Cores[0]
atomic.StoreUint32(&vault.TestingUpdateClusterAddr, 1)

{
testhelpers.EnsureCoreSealed(t, leaderCore)
leaderCore.UnderlyingRawStorage.(*raft.RaftBackend).SetServerAddressProvider(addressProvider)
cluster.UnsealCore(t, leaderCore)
vault.TestWaitActive(t, leaderCore.Core)
}

leaderInfos := []*raft.LeaderJoinInfo{
{
AutoJoin: "provider=aws region=eu-west-1 tag_key=consul tag_value=tag access_key_id=a secret_access_key=a",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there a set of params that the user has to specify to use auto join? Like, provider and region make sense, but are there other fields that can be used instead of the fields here that specify, as I understand, an IAM user? If so, would it be helpful to have a list of fields that autoJoin can use to join a cluster?

I might also just have this question because I'm new to the raft library :).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. It's entirely ambiguous and dependent on the cloud provider and operator infrastructure. See go-discover.

TLSConfig: leaderCore.TLSConfig,
Retry: true,
},
}

{
// expected to pass but not join as we're not actually discovering leader addresses
core := cluster.Cores[1]
core.UnderlyingRawStorage.(*raft.RaftBackend).SetServerAddressProvider(addressProvider)

_, err := core.JoinRaftCluster(namespace.RootContext(context.Background()), leaderInfos, false)
require.NoError(t, err)
}

testhelpers.VerifyRaftPeers(t, cluster.Cores[0].Client, map[string]bool{
"core-0": true,
})
}

func TestRaft_Retry_Join(t *testing.T) {
t.Parallel()
var conf vault.CoreConfig
Expand Down
68 changes: 57 additions & 11 deletions vault/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/golang/protobuf/proto"
"github.com/hashicorp/errwrap"
cleanhttp "github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/go-discover"
"github.com/hashicorp/go-hclog"
wrapping "github.com/hashicorp/go-kms-wrapping"
uuid "github.com/hashicorp/go-uuid"
Expand Down Expand Up @@ -739,14 +740,14 @@ func (c *Core) JoinRaftCluster(ctx context.Context, leaderInfos []*raft.LeaderJo
return false, errors.New("node must be unsealed before joining")
}

// Disallow leader API address to be provided if we're using raft for HA-only
// Disallow leader API address to be provided if we're using raft for HA-only.
// The leader API address is obtained directly through storage. This serves
// as a form of verification that this node is sharing the same physical
// storage as the leader node.
if isRaftHAOnly {
for _, info := range leaderInfos {
if info.LeaderAPIAddr != "" {
return false, errors.New("leader API address must be unset when raft is used exclusively for HA")
if info.LeaderAPIAddr != "" || info.AutoJoin != "" {
return false, errors.New("leader API address or auto-join metadata must be unset when raft is used exclusively for HA")
alexanderbez marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down Expand Up @@ -778,12 +779,17 @@ func (c *Core) JoinRaftCluster(ctx context.Context, leaderInfos []*raft.LeaderJo
leaderInfos[0].LeaderAPIAddr = adv.RedirectAddr
}

disco, err := newDiscover()
if err != nil {
return false, errwrap.Wrapf("failed to create auto-join discovery: {{err}}", err)
}

join := func(retry bool) error {
joinLeader := func(leaderInfo *raft.LeaderJoinInfo) error {
joinLeader := func(leaderInfo *raft.LeaderJoinInfo, leaderAddr string) error {
if leaderInfo == nil {
return errors.New("raft leader information is nil")
}
if len(leaderInfo.LeaderAPIAddr) == 0 {
if len(leaderAddr) == 0 {
return errors.New("raft leader address not provided")
}

Expand All @@ -797,7 +803,7 @@ func (c *Core) JoinRaftCluster(ctx context.Context, leaderInfos []*raft.LeaderJo
return nil
}

c.logger.Info("attempting to join possible raft leader node", "leader_addr", leaderInfo.LeaderAPIAddr)
c.logger.Info("attempting to join possible raft leader node", "leader_addr", leaderAddr)

// Create an API client to interact with the leader node
transport := cleanhttp.DefaultPooledTransport()
Expand All @@ -819,13 +825,16 @@ func (c *Core) JoinRaftCluster(ctx context.Context, leaderInfos []*raft.LeaderJo
client := &http.Client{
Transport: transport,
}

config := api.DefaultConfig()
if config.Error != nil {
return errwrap.Wrapf("failed to create api client: {{err}}", config.Error)
}
config.Address = leaderInfo.LeaderAPIAddr

config.Address = leaderAddr
config.HttpClient = client
config.MaxRetries = 0

apiClient, err := api.NewClient(config)
if err != nil {
return errwrap.Wrapf("failed to create api client: {{err}}", err)
Expand Down Expand Up @@ -865,6 +874,7 @@ func (c *Core) JoinRaftCluster(ctx context.Context, leaderInfos []*raft.LeaderJo
if err := proto.Unmarshal(challengeRaw, eBlob); err != nil {
return errwrap.Wrapf("error decoding raft bootstrap challenge: {{err}}", err)
}

raftInfo := &raftInformation{
challenge: eBlob,
leaderClient: apiClient,
Expand Down Expand Up @@ -911,11 +921,36 @@ func (c *Core) JoinRaftCluster(ctx context.Context, leaderInfos []*raft.LeaderJo
// Each join try goes through all the possible leader nodes and attempts to join
// them, until one of the attempt succeeds.
for _, leaderInfo := range leaderInfos {
err = joinLeader(leaderInfo)
if err == nil {
return nil
switch {
case leaderInfo.LeaderAPIAddr != "" && leaderInfo.AutoJoin != "":
c.logger.Info("join attempt failed", "error", errors.New("cannot provide both leader address and auto-join metadata"))

case leaderInfo.LeaderAPIAddr != "":
if err := joinLeader(leaderInfo, leaderInfo.LeaderAPIAddr); err != nil {
c.logger.Info("join attempt failed", "error", err)
} else {
// successfully joined leader
return nil
}

case leaderInfo.AutoJoin != "":
addrs, err := disco.Addrs(leaderInfo.AutoJoin, c.logger.StandardLogger(nil))
if err != nil {
c.logger.Info("failed to parse addresses from auto-join metadata", "error", err)
}

for _, addr := range addrs {
if err := joinLeader(leaderInfo, addr); err != nil {
c.logger.Info("join attempt failed", "error", err)
} else {
// successfully joined leader
return nil
}
}

default:
c.logger.Info("join attempt failed", "error", errors.New("must provide leader address or auto-join metadata"))
}
c.logger.Info("join attempt failed", "error", err)
}

return errors.New("failed to join any raft leader node")
Expand Down Expand Up @@ -1116,3 +1151,14 @@ type answerResp struct {
Peers []raft.Peer `json:"peers"`
TLSKeyring *raft.TLSKeyring `json:"tls_keyring"`
}

func newDiscover() (*discover.Discover, error) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this helper still needed now that we're not adding k8s?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah we can safely remove this 👍

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But on second thought, just in case the go-discover API changes, I figure it's safe to be explicit. So I'm leaning towards keeping this.

providers := make(map[string]discover.Provider)
for k, v := range discover.Providers {
providers[k] = v
}

return discover.New(
discover.WithProviders(providers),
)
}
2 changes: 1 addition & 1 deletion vendor/github.com/Azure/azure-sdk-for-go/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.