Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Verify TLS certificate on endpoints that are used between agents only #11956

Merged
merged 9 commits into from
Feb 2, 2022
3 changes: 3 additions & 0 deletions .changelog/11956.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:security
server: validate mTLS certificate names on agent to agent endpoints
```
80 changes: 80 additions & 0 deletions .semgrep/rpc_endpoint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
rules:
lgfa29 marked this conversation as resolved.
Show resolved Hide resolved
# Check potentially unauthenticated RPC endpoints
- id: "rpc-potentially-unauthenticated"
patterns:
- pattern: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := $X.$Y.ResolveToken(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := $U.requestACLToken(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := $T.NamespaceValidator(...)
...
# Pattern used by endpoints called exclusively between agents
# (server -> server or client -> server)
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateLocalClientTLSCertificate(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateLocalServerTLSCertificate(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateTLSCertificate(...)
...
# Pattern used by some Node endpoints.
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
return $A.deregister(...)
...
- metavariable-pattern:
metavariable: $METHOD
patterns:
# Endpoints that are expected not to have authentication.
- pattern-not: '"ACL.Bootstrap"'
- pattern-not: '"ACL.ResolveToken"'
- pattern-not: '"ACL.UpsertOneTimeToken"'
- pattern-not: '"ACL.ExchangeOneTimeToken"'
- pattern-not: '"CSIPlugin.Get"'
- pattern-not: '"CSIPlugin.List"'
- pattern-not: '"Status.Leader"'
- pattern-not: '"Status.Peers"'
- pattern-not: '"Status.Version"'
message: "RPC method $METHOD appears to be unauthenticated"
languages:
- "go"
severity: "WARNING"
paths:
include:
- "*_endpoint.go"
8 changes: 8 additions & 0 deletions nomad/alloc_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ import (
type Alloc struct {
srv *Server
logger log.Logger

// ctx provides context regarding the underlying connection
ctx *RPCContext
}

// List is used to list the allocations in the system
Expand Down Expand Up @@ -224,6 +227,11 @@ func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest,
}
defer metrics.MeasureSince([]string{"nomad", "alloc", "get_allocs"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(a.srv, a.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", a.srv.Region(), err)
}

allocs := make([]*structs.Allocation, len(args.AllocIDs))

// Setup the blocking query. We wait for at least one of the requested
Expand Down
8 changes: 8 additions & 0 deletions nomad/deployment_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ import (
type Deployment struct {
srv *Server
logger log.Logger

// ctx provides context regarding the underlying connection
ctx *RPCContext
}

// GetDeployment is used to request information about a specific deployment
Expand Down Expand Up @@ -506,6 +509,11 @@ func (d *Deployment) Reap(args *structs.DeploymentDeleteRequest,
}
defer metrics.MeasureSince([]string{"nomad", "deployment", "reap"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(d.srv, d.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", d.srv.Region(), err)
}

// Update via Raft
_, index, err := d.srv.raftApply(structs.DeploymentDeleteRequestType, args)
if err != nil {
Expand Down
38 changes: 38 additions & 0 deletions nomad/eval_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ const (
type Eval struct {
srv *Server
logger log.Logger

// ctx provides context regarding the underlying connection
ctx *RPCContext
}

// GetEval is used to request information about a specific evaluation
Expand Down Expand Up @@ -87,6 +90,11 @@ func (e *Eval) Dequeue(args *structs.EvalDequeueRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "dequeue"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ensure there is at least one scheduler
if len(args.Schedulers) == 0 {
return fmt.Errorf("dequeue requires at least one scheduler type")
Expand Down Expand Up @@ -172,6 +180,11 @@ func (e *Eval) Ack(args *structs.EvalAckRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "ack"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ack the EvalID
if err := e.srv.evalBroker.Ack(args.EvalID, args.Token); err != nil {
return err
Expand All @@ -187,6 +200,11 @@ func (e *Eval) Nack(args *structs.EvalAckRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "nack"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Nack the EvalID
if err := e.srv.evalBroker.Nack(args.EvalID, args.Token); err != nil {
return err
Expand All @@ -202,6 +220,11 @@ func (e *Eval) Update(args *structs.EvalUpdateRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "update"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
return fmt.Errorf("only a single eval can be updated")
Expand Down Expand Up @@ -232,6 +255,11 @@ func (e *Eval) Create(args *structs.EvalUpdateRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "create"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
return fmt.Errorf("only a single eval can be created")
Expand Down Expand Up @@ -277,6 +305,11 @@ func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericRe
}
defer metrics.MeasureSince([]string{"nomad", "eval", "reblock"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
return fmt.Errorf("only a single eval can be reblocked")
Expand Down Expand Up @@ -319,6 +352,11 @@ func (e *Eval) Reap(args *structs.EvalDeleteRequest,
}
defer metrics.MeasureSince([]string{"nomad", "eval", "reap"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
}

// Update via Raft
_, index, err := e.srv.raftApply(structs.EvalDeleteRequestType, args)
if err != nil {
Expand Down
9 changes: 4 additions & 5 deletions nomad/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegis
reply.Warnings = structs.MergeMultierrorWarnings(warnings...)

// Check job submission permissions
var aclObj *acl.ACL
if aclObj, err = j.srv.ResolveToken(args.AuthToken); err != nil {
aclObj, err := j.srv.ResolveToken(args.AuthToken)
if err != nil {
lgfa29 marked this conversation as resolved.
Show resolved Hide resolved
return err
} else if aclObj != nil {
if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
Expand Down Expand Up @@ -1879,9 +1879,8 @@ func (j *Job) Dispatch(args *structs.JobDispatchRequest, reply *structs.JobDispa
defer metrics.MeasureSince([]string{"nomad", "job", "dispatch"}, time.Now())

// Check for submit-job permissions
var aclObj *acl.ACL
var err error
if aclObj, err = j.srv.ResolveToken(args.AuthToken); err != nil {
aclObj, err := j.srv.ResolveToken(args.AuthToken)
if err != nil {
return err
} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityDispatchJob) {
return structs.ErrPermissionDenied
Expand Down
10 changes: 10 additions & 0 deletions nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,11 @@ func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.Gene
}
defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(n.srv, n.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", n.srv.Region(), err)
}

// Ensure at least a single alloc
if len(args.Alloc) == 0 {
return fmt.Errorf("must update at least one allocation")
Expand Down Expand Up @@ -1920,6 +1925,11 @@ func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.Em
}
defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(n.srv, n.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", n.srv.Region(), err)
}

if len(args.NodeEvents) == 0 {
return fmt.Errorf("no node events given")
}
Expand Down
8 changes: 8 additions & 0 deletions nomad/plan_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ import (
type Plan struct {
srv *Server
logger log.Logger

// ctx provides context regarding the underlying connection
ctx *RPCContext
}

// Submit is used to submit a plan to the leader
Expand All @@ -23,6 +26,11 @@ func (p *Plan) Submit(args *structs.PlanRequest, reply *structs.PlanResponse) er
}
defer metrics.MeasureSince([]string{"nomad", "plan", "submit"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(p.srv, p.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", p.srv.Region(), err)
}

if args.Plan == nil {
return fmt.Errorf("cannot submit nil plan")
}
Expand Down
62 changes: 41 additions & 21 deletions nomad/rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,38 @@ type RPCContext struct {
NodeID string
}

// Certificate returns the first certificate available in the chain.
func (ctx *RPCContext) Certificate() *x509.Certificate {
lgfa29 marked this conversation as resolved.
Show resolved Hide resolved
if ctx == nil || len(ctx.VerifiedChains) == 0 || len(ctx.VerifiedChains[0]) == 0 {
return nil
}

return ctx.VerifiedChains[0][0]
}

// ValidateCertificateForName returns true if the RPC context certificate is valid
// for the given domain name.
func (ctx *RPCContext) ValidateCertificateForName(name string) error {
if ctx == nil || !ctx.TLS {
return nil
}

cert := ctx.Certificate()
if cert == nil {
return errors.New("missing certificate information")
}
for _, dnsName := range cert.DNSNames {
if dnsName == name {
return nil
}
}
if cert.Subject.CommonName == name {
return nil
}

return fmt.Errorf("certificate not valid for %q", name)
}

// listen is used to listen for incoming RPC connections
func (r *rpcHandler) listen(ctx context.Context) {
defer close(r.listenerCh)
Expand Down Expand Up @@ -838,30 +870,18 @@ func (r *rpcHandler) validateRaftTLS(rpcCtx *RPCContext) error {
return nil
}

// defensive conditions: these should have already been enforced by handleConn
if rpcCtx == nil || !rpcCtx.TLS {
return errors.New("non-TLS connection attempted")
}
if len(rpcCtx.VerifiedChains) == 0 || len(rpcCtx.VerifiedChains[0]) == 0 {
// this should never happen, as rpcNameAndRegionValidate should have enforced it
return errors.New("missing cert info")
}

// check that `server.<region>.nomad` is present in cert
expected := "server." + r.Region() + ".nomad"

cert := rpcCtx.VerifiedChains[0][0]
for _, dnsName := range cert.DNSNames {
if dnsName == expected {
// Certificate is valid for the expected name
return nil
err := rpcCtx.ValidateCertificateForName(expected)
if err != nil {
cert := rpcCtx.Certificate()
if cert != nil {
err = fmt.Errorf("request certificate is only valid for %s: %v", cert.DNSNames, err)
}
}
if cert.Subject.CommonName == expected {
// Certificate is valid for the expected name
return nil

return fmt.Errorf("unauthorized raft connection from %s: %v", rpcCtx.Conn.RemoteAddr(), err)
}

r.logger.Warn("unauthorized raft connection", "remote_addr", rpcCtx.Conn.RemoteAddr(), "required_hostname", expected, "found", cert.DNSNames)
return fmt.Errorf("certificate is invalid for expected role or region: %q", expected)
// Certificate is valid for the expected name
return nil
}
Loading