Skip to content

Commit

Permalink
Add DNS recursor strategy option (#10611)
Browse files Browse the repository at this point in the history
This change adds a new `dns_config.recursor_strategy` option which
controls how Consul queries DNS resolvers listed in the `recursors`
config option. The supported options are `sequential` (default), and
`random`.

Closes #8807

Co-authored-by: Blake Covarrubias <blake@covarrubi.as>
Co-authored-by: Priyanka Sengupta <psengupta@flatiron.com>
  • Loading branch information
blake and munali committed Jul 19, 2021
1 parent 832896e commit a0cd3dd
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 19 deletions.
3 changes: 3 additions & 0 deletions .changelog/10611.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
config: add `dns_config.recursor_strategy` flag to control the order which DNS recursors are queried
```
15 changes: 15 additions & 0 deletions agent/config/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) {
DNSNodeTTL: b.durationVal("dns_config.node_ttl", c.DNS.NodeTTL),
DNSOnlyPassing: boolVal(c.DNS.OnlyPassing),
DNSPort: dnsPort,
DNSRecursorStrategy: b.dnsRecursorStrategyVal(stringVal(c.DNS.RecursorStrategy)),
DNSRecursorTimeout: b.durationVal("recursor_timeout", c.DNS.RecursorTimeout),
DNSRecursors: dnsRecursors,
DNSServiceTTL: dnsServiceTTL,
Expand Down Expand Up @@ -1745,6 +1746,20 @@ func (b *builder) meshGatewayConfVal(mgConf *MeshGatewayConfig) structs.MeshGate
return cfg
}

func (b *builder) dnsRecursorStrategyVal(v string) dns.RecursorStrategy {
var out dns.RecursorStrategy

switch dns.RecursorStrategy(v) {
case dns.RecursorStrategyRandom:
out = dns.RecursorStrategyRandom
case dns.RecursorStrategySequential, "":
out = dns.RecursorStrategySequential
default:
b.err = multierror.Append(b.err, fmt.Errorf("dns_config.recursor_strategy: invalid strategy: %q", v))
}
return out
}

func (b *builder) exposeConfVal(v *ExposeConfig) structs.ExposeConfig {
var out structs.ExposeConfig
if v == nil {
Expand Down
1 change: 1 addition & 0 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,7 @@ type DNS struct {
MaxStale *string `mapstructure:"max_stale"`
NodeTTL *string `mapstructure:"node_ttl"`
OnlyPassing *bool `mapstructure:"only_passing"`
RecursorStrategy *string `mapstructure:"recursor_strategy"`
RecursorTimeout *string `mapstructure:"recursor_timeout"`
ServiceTTL map[string]string `mapstructure:"service_ttl"`
UDPAnswerLimit *int `mapstructure:"udp_answer_limit"`
Expand Down
10 changes: 10 additions & 0 deletions agent/config/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/agent/dns"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
Expand Down Expand Up @@ -270,6 +271,15 @@ type RuntimeConfig struct {
// hcl: dns_config { only_passing = (true|false) }
DNSOnlyPassing bool

// DNSRecursorStrategy controls the order in which DNS recursors are queried.
// 'sequential' queries recursors in the order they are listed under `recursors`.
// 'random' causes random selection of recursors which has the effect of
// spreading the query load among all listed servers, rather than having
// client agents try the first server in the list every time.
//
// hcl: dns_config { recursor_strategy = "(random|sequential)" }
DNSRecursorStrategy dns.RecursorStrategy

// DNSRecursorTimeout specifies the timeout in seconds
// for Consul's internal dns client used for recursion.
// This value is used for the connection, read and write timeout.
Expand Down
1 change: 1 addition & 0 deletions agent/config/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5425,6 +5425,7 @@ func TestLoad_FullConfig(t *testing.T) {
DNSNodeTTL: 7084 * time.Second,
DNSOnlyPassing: true,
DNSPort: 7001,
DNSRecursorStrategy: "sequential",
DNSRecursorTimeout: 4427 * time.Second,
DNSRecursors: []string{"63.38.39.58", "92.49.18.18"},
DNSSOA: RuntimeSOAConfig{Refresh: 3600, Retry: 600, Expire: 86400, Minttl: 0},
Expand Down
1 change: 1 addition & 0 deletions agent/config/testdata/TestRuntimeConfig_Sanitize.golden
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@
"DNSNodeTTL": "0s",
"DNSOnlyPassing": false,
"DNSPort": 0,
"DNSRecursorStrategy": "",
"DNSRecursorTimeout": "0s",
"DNSRecursors": [],
"DNSSOA": {
Expand Down
40 changes: 22 additions & 18 deletions agent/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,23 @@ type dnsSOAConfig struct {
}

type dnsConfig struct {
AllowStale bool
Datacenter string
EnableTruncate bool
MaxStale time.Duration
UseCache bool
CacheMaxAge time.Duration
NodeName string
NodeTTL time.Duration
OnlyPassing bool
RecursorTimeout time.Duration
Recursors []string
SegmentName string
UDPAnswerLimit int
ARecordLimit int
NodeMetaTXT bool
SOAConfig dnsSOAConfig
AllowStale bool
Datacenter string
EnableTruncate bool
MaxStale time.Duration
UseCache bool
CacheMaxAge time.Duration
NodeName string
NodeTTL time.Duration
OnlyPassing bool
RecursorStrategy agentdns.RecursorStrategy
RecursorTimeout time.Duration
Recursors []string
SegmentName string
UDPAnswerLimit int
ARecordLimit int
NodeMetaTXT bool
SOAConfig dnsSOAConfig
// TTLRadix sets service TTLs by prefix, eg: "database-*"
TTLRadix *radix.Tree
// TTLStict sets TTLs to service by full name match. It Has higher priority than TTLRadix
Expand Down Expand Up @@ -154,6 +155,7 @@ func GetDNSConfig(conf *config.RuntimeConfig) (*dnsConfig, error) {
NodeName: conf.NodeName,
NodeTTL: conf.DNSNodeTTL,
OnlyPassing: conf.DNSOnlyPassing,
RecursorStrategy: conf.DNSRecursorStrategy,
RecursorTimeout: conf.DNSRecursorTimeout,
SegmentName: conf.SegmentName,
UDPAnswerLimit: conf.DNSUDPAnswerLimit,
Expand Down Expand Up @@ -1851,7 +1853,8 @@ func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) {
var r *dns.Msg
var rtt time.Duration
var err error
for _, recursor := range cfg.Recursors {
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(req, recursor)
// Check if the response is valid and has the desired Response code
if r != nil && (r.Rcode != dns.RcodeSuccess && r.Rcode != dns.RcodeNameError) {
Expand Down Expand Up @@ -1936,7 +1939,8 @@ func (d *DNSServer) resolveCNAME(cfg *dnsConfig, name string, maxRecursionLevel
var r *dns.Msg
var rtt time.Duration
var err error
for _, recursor := range cfg.Recursors {
for _, idx := range cfg.RecursorStrategy.Indexes(len(cfg.Recursors)) {
recursor := cfg.Recursors[idx]
r, rtt, err = c.Exchange(m, recursor)
if err == nil {
d.logger.Debug("cname recurse RTT for name",
Expand Down
26 changes: 25 additions & 1 deletion agent/dns/dns.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,34 @@
package dns

import "regexp"
import (
"math/rand"
"regexp"
)

// MaxLabelLength is the maximum length for a name that can be used in DNS.
const MaxLabelLength = 63

// InvalidNameRe is a regex that matches characters which can not be included in
// a DNS name.
var InvalidNameRe = regexp.MustCompile(`[^A-Za-z0-9\\-]+`)

type RecursorStrategy string

const (
RecursorStrategySequential RecursorStrategy = "sequential"
RecursorStrategyRandom RecursorStrategy = "random"
)

func (s RecursorStrategy) Indexes(max int) []int {
switch s {
case RecursorStrategyRandom:
return rand.Perm(max)
default:
idxs := make([]int, max)
for i := range idxs {
idxs[i] = i
}
return idxs

}
}
40 changes: 40 additions & 0 deletions agent/dns/dns_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package dns

import (
"testing"

"github.com/hashicorp/consul/sdk/testutil/retry"
"github.com/stretchr/testify/require"
)

func TestDNS_Recursor_StrategyRandom(t *testing.T) {
configuredRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
recursorStrategy := RecursorStrategy("random")

retry.RunWith(&retry.Counter{Count: 5}, t, func(r *retry.R) {
recursorsToQuery := make([]string, 0)
for _, idx := range recursorStrategy.Indexes(len(configuredRecursors)) {
recursorsToQuery = append(recursorsToQuery, configuredRecursors[idx])
}

// Ensure the slices contain the same elements
require.ElementsMatch(t, configuredRecursors, recursorsToQuery)

// Ensure the elements are not in the same order
require.NotEqual(r, configuredRecursors, recursorsToQuery)
})
}

func TestDNS_Recursor_StrategySequential(t *testing.T) {
expectedRecursors := []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"}
recursorStrategy := RecursorStrategy("sequential")

recursorsToQuery := make([]string, 0)
for _, idx := range recursorStrategy.Indexes(len(expectedRecursors)) {
recursorsToQuery = append(recursorsToQuery, expectedRecursors[idx])
}

// The list of recursors should match the order in which they were defined
// in the configuration
require.Equal(t, recursorsToQuery, expectedRecursors)
}
4 changes: 4 additions & 0 deletions agent/dns_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7610,6 +7610,7 @@ func TestDNS_ConfigReload(t *testing.T) {
}
enable_truncate = false
only_passing = false
recursor_strategy = "sequential"
recursor_timeout = "15s"
disable_compression = false
a_record_limit = 1
Expand All @@ -7628,6 +7629,7 @@ func TestDNS_ConfigReload(t *testing.T) {
for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"8.8.8.8:53"}, cfg.Recursors)
require.Equal(t, agentdns.RecursorStrategy("sequential"), cfg.RecursorStrategy)
require.False(t, cfg.AllowStale)
require.Equal(t, 20*time.Second, cfg.MaxStale)
require.Equal(t, 10*time.Second, cfg.NodeTTL)
Expand Down Expand Up @@ -7658,6 +7660,7 @@ func TestDNS_ConfigReload(t *testing.T) {
}
newCfg.DNSEnableTruncate = true
newCfg.DNSOnlyPassing = true
newCfg.DNSRecursorStrategy = "random"
newCfg.DNSRecursorTimeout = 16 * time.Second
newCfg.DNSDisableCompression = true
newCfg.DNSARecordLimit = 2
Expand All @@ -7673,6 +7676,7 @@ func TestDNS_ConfigReload(t *testing.T) {
for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"1.1.1.1:53"}, cfg.Recursors)
require.Equal(t, agentdns.RecursorStrategy("random"), cfg.RecursorStrategy)
require.True(t, cfg.AllowStale)
require.Equal(t, 21*time.Second, cfg.MaxStale)
require.Equal(t, 11*time.Second, cfg.NodeTTL)
Expand Down
5 changes: 5 additions & 0 deletions website/content/docs/agent/options.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1357,6 +1357,11 @@ bind_addr = "{{ GetPrivateInterfaces | include \"network\" \"10.0.0.0/8\" | attr
then all services on that node will be excluded because they are also considered
critical.

- `recursor_strategy` - If set to `sequential`, Consul will query recursors in the
order listed in the [`recursors`](#recursors) option. If set to `random`,
Consul will query an upstream DNS resolvers in a random order. Defaults to
`sequential`.

- `recursor_timeout` - Timeout used by Consul when
recursively querying an upstream DNS server. See [`recursors`](#recursors) for more details. Default is 2s. This is available in Consul 0.7 and later.

Expand Down

0 comments on commit a0cd3dd

Please sign in to comment.