Skip to content

Commit

Permalink
Security fixes (#7182)
Browse files Browse the repository at this point in the history
* Mitigate HTTP/RPC Services Allow Unbounded Resource Usage

Fixes #7159.

Co-authored-by: Matt Keeler <mkeeler@users.noreply.github.com>
Co-authored-by: Paul Banks <banks@banksco.de>
  • Loading branch information
3 people committed Jan 31, 2020
1 parent d5f9268 commit 5531678
Show file tree
Hide file tree
Showing 33 changed files with 1,165 additions and 90 deletions.
59 changes: 53 additions & 6 deletions agent/agent.go
Expand Up @@ -18,6 +18,7 @@ import (
"sync"
"time"

"github.com/hashicorp/go-connlimit"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"

Expand Down Expand Up @@ -305,6 +306,10 @@ type Agent struct {
// store within the data directory. This will prevent loading while writing as
// well as multiple concurrent writes.
persistedTokensLock sync.RWMutex

// httpConnLimiter is used to limit connections to the HTTP server by client
// IP.
httpConnLimiter connlimit.Limiter
}

// New verifies the configuration given has a Datacenter and DataDir
Expand Down Expand Up @@ -524,6 +529,11 @@ func (a *Agent) Start() error {
return err
}

// Configure the http connection limiter.
a.httpConnLimiter.SetConfig(connlimit.Config{
MaxConnsPerClientIP: a.config.HTTPMaxConnsPerClient,
})

// Create listeners and unstarted servers; see comment on listenHTTP why
// we are doing this.
servers, err := a.listenHTTP()
Expand Down Expand Up @@ -866,6 +876,7 @@ func (a *Agent) listenHTTP() ([]*HTTPServer, error) {
tlscfg = a.tlsConfigurator.IncomingHTTPSConfig()
l = tls.NewListener(l, tlscfg)
}

srv := &HTTPServer{
Server: &http.Server{
Addr: l.Addr().String(),
Expand All @@ -878,13 +889,37 @@ func (a *Agent) listenHTTP() ([]*HTTPServer, error) {
}
srv.Server.Handler = srv.handler(a.config.EnableDebug)

// This will enable upgrading connections to HTTP/2 as
// part of TLS negotiation.
// Load the connlimit helper into the server
connLimitFn := a.httpConnLimiter.HTTPConnStateFunc()

if proto == "https" {
// Enforce TLS handshake timeout
srv.Server.ConnState = func(conn net.Conn, state http.ConnState) {
switch state {
case http.StateNew:
// Set deadline to prevent slow send before TLS handshake or first
// byte of request.
conn.SetReadDeadline(time.Now().Add(a.config.HTTPSHandshakeTimeout))
case http.StateActive:
// Clear read deadline. We should maybe set read timeouts more
// generally but that's a bigger task as some HTTP endpoints may
// stream large requests and responses (e.g. snapshot) so we can't
// set sensible blanket timeouts here.
conn.SetReadDeadline(time.Time{})
}
// Pass through to conn limit. This is OK because we didn't change
// state (i.e. Close conn).
connLimitFn(conn, state)
}

// This will enable upgrading connections to HTTP/2 as
// part of TLS negotiation.
err = http2.ConfigureServer(srv.Server, nil)
if err != nil {
return err
}
} else {
srv.Server.ConnState = connLimitFn
}

ln = append(ln, l)
Expand Down Expand Up @@ -1252,10 +1287,18 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
base.RPCMaxBurst = a.config.RPCMaxBurst
}

// RPC-related performance configs.
if a.config.RPCHoldTimeout > 0 {
base.RPCHoldTimeout = a.config.RPCHoldTimeout
// RPC timeouts/limits.
if a.config.RPCHandshakeTimeout > 0 {
base.RPCHandshakeTimeout = a.config.RPCHandshakeTimeout
}
if a.config.RPCMaxConnsPerClient > 0 {
base.RPCMaxConnsPerClient = a.config.RPCMaxConnsPerClient
}

// RPC-related performance configs. We allow explicit zero value to disable so
// copy it whatever the value.
base.RPCHoldTimeout = a.config.RPCHoldTimeout

if a.config.LeaveDrainTime > 0 {
base.LeaveDrainTime = a.config.LeaveDrainTime
}
Expand Down Expand Up @@ -3970,6 +4013,10 @@ func (a *Agent) ReloadConfig(newCfg *config.RuntimeConfig) error {

a.loadLimits(newCfg)

a.httpConnLimiter.SetConfig(connlimit.Config{
MaxConnsPerClientIP: newCfg.HTTPMaxConnsPerClient,
})

for _, s := range a.dnsServers {
if err := s.ReloadConfig(newCfg); err != nil {
return fmt.Errorf("Failed reloading dns config : %v", err)
Expand All @@ -3979,7 +4026,7 @@ func (a *Agent) ReloadConfig(newCfg *config.RuntimeConfig) error {
// this only gets used by the consulConfig function and since
// that is only ever done during init and reload here then
// an in place modification is safe as reloads cannot be
// concurrent due to both gaing a full lock on the stateLock
// concurrent due to both gaining a full lock on the stateLock
a.config.ConfigEntryBootstrap = newCfg.ConfigEntryBootstrap

// create the config for the rpc server/client
Expand Down
4 changes: 4 additions & 0 deletions agent/config/builder.go
Expand Up @@ -906,6 +906,8 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
EncryptVerifyOutgoing: b.boolVal(c.EncryptVerifyOutgoing),
GRPCPort: grpcPort,
GRPCAddrs: grpcAddrs,
HTTPMaxConnsPerClient: b.intVal(c.Limits.HTTPMaxConnsPerClient),
HTTPSHandshakeTimeout: b.durationVal("limits.https_handshake_timeout", c.Limits.HTTPSHandshakeTimeout),
KeyFile: b.stringVal(c.KeyFile),
KVMaxValueSize: b.uint64Val(c.Limits.KVMaxValueSize),
LeaveDrainTime: b.durationVal("performance.leave_drain_time", c.Performance.LeaveDrainTime),
Expand All @@ -925,8 +927,10 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
PrimaryDatacenter: primaryDatacenter,
RPCAdvertiseAddr: rpcAdvertiseAddr,
RPCBindAddr: rpcBindAddr,
RPCHandshakeTimeout: b.durationVal("limits.rpc_handshake_timeout", c.Limits.RPCHandshakeTimeout),
RPCHoldTimeout: b.durationVal("performance.rpc_hold_timeout", c.Performance.RPCHoldTimeout),
RPCMaxBurst: b.intVal(c.Limits.RPCMaxBurst),
RPCMaxConnsPerClient: b.intVal(c.Limits.RPCMaxConnsPerClient),
RPCProtocol: b.intVal(c.RPCProtocol),
RPCRateLimit: rate.Limit(b.float64Val(c.Limits.RPCRate)),
RaftProtocol: b.intVal(c.RaftProtocol),
Expand Down
10 changes: 7 additions & 3 deletions agent/config/config.go
Expand Up @@ -675,9 +675,13 @@ type UnixSocket struct {
}

type Limits struct {
RPCMaxBurst *int `json:"rpc_max_burst,omitempty" hcl:"rpc_max_burst" mapstructure:"rpc_max_burst"`
RPCRate *float64 `json:"rpc_rate,omitempty" hcl:"rpc_rate" mapstructure:"rpc_rate"`
KVMaxValueSize *uint64 `json:"kv_max_value_size,omitempty" hcl:"kv_max_value_size" mapstructure:"kv_max_value_size"`
HTTPMaxConnsPerClient *int `json:"http_max_conns_per_client,omitempty" hcl:"http_max_conns_per_client" mapstructure:"http_max_conns_per_client"`
HTTPSHandshakeTimeout *string `json:"https_handshake_timeout,omitempty" hcl:"https_handshake_timeout" mapstructure:"https_handshake_timeout"`
RPCHandshakeTimeout *string `json:"rpc_handshake_timeout,omitempty" hcl:"rpc_handshake_timeout" mapstructure:"rpc_handshake_timeout"`
RPCMaxBurst *int `json:"rpc_max_burst,omitempty" hcl:"rpc_max_burst" mapstructure:"rpc_max_burst"`
RPCMaxConnsPerClient *int `json:"rpc_max_conns_per_client,omitempty" hcl:"rpc_max_conns_per_client" mapstructure:"rpc_max_conns_per_client"`
RPCRate *float64 `json:"rpc_rate,omitempty" hcl:"rpc_rate" mapstructure:"rpc_rate"`
KVMaxValueSize *uint64 `json:"kv_max_value_size,omitempty" hcl:"kv_max_value_size" mapstructure:"kv_max_value_size"`
}

type Segment struct {
Expand Down
4 changes: 4 additions & 0 deletions agent/config/default.go
Expand Up @@ -103,8 +103,12 @@ func DefaultSource() Source {
recursor_timeout = "2s"
}
limits = {
http_max_conns_per_client = 100
https_handshake_timeout = "5s"
rpc_handshake_timeout = "5s"
rpc_rate = -1
rpc_max_burst = 1000
rpc_max_conns_per_client = 100
kv_max_value_size = ` + strconv.FormatInt(raft.SuggestedMaxDataSize, 10) + `
}
performance = {
Expand Down
27 changes: 27 additions & 0 deletions agent/config/runtime.go
Expand Up @@ -803,6 +803,18 @@ type RuntimeConfig struct {
// hcl: client_addr = string addresses { https = string } ports { https = int }
HTTPSAddrs []net.Addr

// HTTPMaxConnsPerClient limits the number of concurrent TCP connections the
// HTTP(S) server will accept from any single source IP address.
//
// hcl: limits{ http_max_conns_per_client = 100 }
HTTPMaxConnsPerClient int

// HTTPSHandshakeTimeout is the time allowed for HTTPS client to complete the
// TLS handshake and send first bytes of the request.
//
// hcl: limits{ https_handshake_timeout = "5s" }
HTTPSHandshakeTimeout time.Duration

// HTTPSPort is the port the HTTP server listens on. The default is -1.
// Setting this to a value <= 0 disables the endpoint.
//
Expand Down Expand Up @@ -927,6 +939,15 @@ type RuntimeConfig struct {
// hcl: bind_addr = string ports { server = int }
RPCBindAddr *net.TCPAddr

// RPCHandshakeTimeout is the timeout for reading the initial magic byte on a
// new RPC connection. If this is set high it may allow unauthenticated users
// to hold connections open arbitrarily long, even when mutual TLS is being
// enforced. It may be set to 0 explicitly to disable the timeout but this
// should never be used in production. Default is 5 seconds.
//
// hcl: limits { rpc_handshake_timeout = "duration" }
RPCHandshakeTimeout time.Duration

// RPCHoldTimeout is how long an RPC can be "held" before it is errored.
// This is used to paper over a loss of leadership by instead holding RPCs,
// so that the caller experiences a slow response rather than an error.
Expand All @@ -949,6 +970,12 @@ type RuntimeConfig struct {
RPCRateLimit rate.Limit
RPCMaxBurst int

// RPCMaxConnsPerClient limits the number of concurrent TCP connections the
// RPC server will accept from any single source IP address.
//
// hcl: limits{ rpc_max_conns_per_client = 100 }
RPCMaxConnsPerClient int

// RPCProtocol is the Consul protocol version to use.
//
// hcl: protocol = int
Expand Down
38 changes: 38 additions & 0 deletions agent/config/runtime_test.go
Expand Up @@ -3455,6 +3455,28 @@ func TestConfigFlagsAndEdgecases(t *testing.T) {
}
},
},

///////////////////////////////////
// Defaults sanity checks

{
desc: "default limits",
args: []string{
`-data-dir=` + dataDir,
},
patch: func(rt *RuntimeConfig) {
rt.DataDir = dataDir
// Note that in the happy case this test will pass even if you comment
// out all the stuff below since rt is also initialized from the
// defaults. But it's still valuable as it will fail as soon as the
// defaults are changed from these values forcing that change to be
// intentional.
rt.RPCHandshakeTimeout = 5 * time.Second
rt.HTTPSHandshakeTimeout = 5 * time.Second
rt.HTTPMaxConnsPerClient = 100
rt.RPCMaxConnsPerClient = 100
},
},
}

testConfig(t, tests, dataDir)
Expand Down Expand Up @@ -3871,8 +3893,12 @@ func TestFullConfig(t *testing.T) {
"key_file": "IEkkwgIA",
"leave_on_terminate": true,
"limits": {
"http_max_conns_per_client": 9283,
"https_handshake_timeout": "2391ms",
"rpc_handshake_timeout": "1932ms",
"rpc_rate": 12029.43,
"rpc_max_burst": 44848,
"rpc_max_conns_per_client": 2954,
"kv_max_value_size": 1234567800000000
},
"log_level": "k1zo9Spt",
Expand Down Expand Up @@ -4477,8 +4503,12 @@ func TestFullConfig(t *testing.T) {
key_file = "IEkkwgIA"
leave_on_terminate = true
limits {
http_max_conns_per_client = 9283
https_handshake_timeout = "2391ms"
rpc_handshake_timeout = "1932ms"
rpc_rate = 12029.43
rpc_max_burst = 44848
rpc_max_conns_per_client = 2954
kv_max_value_size = 1234567800000000
}
log_level = "k1zo9Spt"
Expand Down Expand Up @@ -5170,6 +5200,8 @@ func TestFullConfig(t *testing.T) {
HTTPPort: 7999,
HTTPResponseHeaders: map[string]string{"M6TKa9NP": "xjuxjOzQ", "JRCrHZed": "rl0mTx81"},
HTTPSAddrs: []net.Addr{tcpAddr("95.17.17.19:15127")},
HTTPMaxConnsPerClient: 9283,
HTTPSHandshakeTimeout: 2391 * time.Millisecond,
HTTPSPort: 15127,
KeyFile: "IEkkwgIA",
KVMaxValueSize: 1234567800000000,
Expand All @@ -5186,10 +5218,12 @@ func TestFullConfig(t *testing.T) {
PrimaryDatacenter: "ejtmd43d",
RPCAdvertiseAddr: tcpAddr("17.99.29.16:3757"),
RPCBindAddr: tcpAddr("16.99.34.17:3757"),
RPCHandshakeTimeout: 1932 * time.Millisecond,
RPCHoldTimeout: 15707 * time.Second,
RPCProtocol: 30793,
RPCRateLimit: 12029.43,
RPCMaxBurst: 44848,
RPCMaxConnsPerClient: 2954,
RaftProtocol: 19016,
RaftSnapshotThreshold: 16384,
RaftSnapshotInterval: 30 * time.Second,
Expand Down Expand Up @@ -6039,9 +6073,11 @@ func TestSanitize(t *testing.T) {
"unix:///var/run/foo"
],
"HTTPBlockEndpoints": [],
"HTTPMaxConnsPerClient": 0,
"HTTPPort": 0,
"HTTPResponseHeaders": {},
"HTTPSAddrs": [],
"HTTPSHandshakeTimeout": "0s",
"HTTPSPort": 0,
"KeyFile": "hidden",
"KVMaxValueSize": 1234567800000000,
Expand All @@ -6062,8 +6098,10 @@ func TestSanitize(t *testing.T) {
"PrimaryDatacenter": "",
"RPCAdvertiseAddr": "",
"RPCBindAddr": "",
"RPCHandshakeTimeout": "0s",
"RPCHoldTimeout": "0s",
"RPCMaxBurst": 0,
"RPCMaxConnsPerClient": 0,
"RPCProtocol": 0,
"RPCRateLimit": 0,
"RaftProtocol": 0,
Expand Down
10 changes: 10 additions & 0 deletions agent/consul/config.go
Expand Up @@ -388,6 +388,12 @@ type Config struct {
// CheckOutputMaxSize control the max size of output of checks
CheckOutputMaxSize int

// RPCHandshakeTimeout limits how long we will wait for the initial magic byte
// on an RPC client connection. It also governs how long we will wait for a
// TLS handshake when TLS is configured however the timout applies separately
// for the initial magic byte and the TLS handshake and inner magic byte.
RPCHandshakeTimeout time.Duration

// RPCHoldTimeout is how long an RPC can be "held" before it is errored.
// This is used to paper over a loss of leadership by instead holding RPCs,
// so that the caller experiences a slow response rather than an error.
Expand All @@ -406,6 +412,10 @@ type Config struct {
RPCRate rate.Limit
RPCMaxBurst int

// RPCMaxConnsPerClient is the limit of how many concurrent connections are
// allowed from a single source IP.
RPCMaxConnsPerClient int

// LeaveDrainTime is used to wait after a server has left the LAN Serf
// pool for RPCs to drain and new requests to be sent to other servers.
LeaveDrainTime time.Duration
Expand Down

0 comments on commit 5531678

Please sign in to comment.