Skip to content

Commit

Permalink
fix mTLS certificate check on agent to agent RPCs
Browse files Browse the repository at this point in the history
PR #11956 implemented a new mTLS RPC check to validate the role of the
certificate used in the request, but further testing revealed two flaws:

  1. client-only endpoints did not accept server certificates so the
     request would fail when forwarded from one server to another.
  2. the certificate was being checked after the request was forwarded,
     so the check would happen over the server certificate, not the
     actual source.

This commit checks for the desired mTLS level, where the client level
accepts both, a server or a client certificate. It also validates the
cercertificate before the request is forwarded.
  • Loading branch information
lgfa29 committed Feb 3, 2022
1 parent 5a32783 commit 2e72cde
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 101 deletions.
17 changes: 1 addition & 16 deletions .semgrep/rpc_endpoint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,11 @@ rules:
# Pattern used by endpoints called exclusively between agents
# (server -> server or client -> server)
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateLocalClientTLSCertificate(...)
... := validateTLSCertificateLevel(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateLocalServerTLSCertificate(...)
...
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
return err
}
...
... := validateTLSCertificate(...)
...
# Pattern used by some Node endpoints.
- pattern-not-inside: |
if done, err := $A.$B.forward($METHOD, ...); done {
Expand Down
12 changes: 7 additions & 5 deletions nomad/alloc_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,15 +222,17 @@ func (a *Alloc) GetAlloc(args *structs.AllocSpecificRequest,
// GetAllocs is used to lookup a set of allocations
func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest,
reply *structs.AllocsGetResponse) error {
if done, err := a.srv.forward("Alloc.GetAllocs", args, args, reply); done {

// Ensure the connection was initiated by a client if TLS is used.
err := validateTLSCertificateLevel(a.srv, a.ctx, tlsCertificateLevelClient)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "alloc", "get_allocs"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(a.srv, a.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", a.srv.Region(), err)
if done, err := a.srv.forward("Alloc.GetAllocs", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "alloc", "get_allocs"}, time.Now())

allocs := make([]*structs.Allocation, len(args.AllocIDs))

Expand Down
12 changes: 7 additions & 5 deletions nomad/deployment_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -504,15 +504,17 @@ func (d *Deployment) Allocations(args *structs.DeploymentSpecificRequest, reply
// Reap is used to cleanup terminal deployments
func (d *Deployment) Reap(args *structs.DeploymentDeleteRequest,
reply *structs.GenericResponse) error {
if done, err := d.srv.forward("Deployment.Reap", args, args, reply); done {

// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(d.srv, d.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "deployment", "reap"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(d.srv, d.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", d.srv.Region(), err)
if done, err := d.srv.forward("Deployment.Reap", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "deployment", "reap"}, time.Now())

// Update via Raft
_, index, err := d.srv.raftApply(structs.DeploymentDeleteRequestType, args)
Expand Down
83 changes: 48 additions & 35 deletions nomad/eval_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,17 @@ func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
// Dequeue is used to dequeue a pending evaluation
func (e *Eval) Dequeue(args *structs.EvalDequeueRequest,
reply *structs.EvalDequeueResponse) error {
if done, err := e.srv.forward("Eval.Dequeue", args, args, reply); done {

// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "dequeue"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
if done, err := e.srv.forward("Eval.Dequeue", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "dequeue"}, time.Now())

// Ensure there is at least one scheduler
if len(args.Schedulers) == 0 {
Expand Down Expand Up @@ -175,15 +177,17 @@ func (e *Eval) getWaitIndex(namespace, job string, evalModifyIndex uint64) (uint
// Ack is used to acknowledge completion of a dequeued evaluation
func (e *Eval) Ack(args *structs.EvalAckRequest,
reply *structs.GenericResponse) error {
if done, err := e.srv.forward("Eval.Ack", args, args, reply); done {

// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "ack"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
if done, err := e.srv.forward("Eval.Ack", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "ack"}, time.Now())

// Ack the EvalID
if err := e.srv.evalBroker.Ack(args.EvalID, args.Token); err != nil {
Expand All @@ -195,15 +199,17 @@ func (e *Eval) Ack(args *structs.EvalAckRequest,
// Nack is used to negative acknowledge completion of a dequeued evaluation.
func (e *Eval) Nack(args *structs.EvalAckRequest,
reply *structs.GenericResponse) error {
if done, err := e.srv.forward("Eval.Nack", args, args, reply); done {

// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "nack"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
if done, err := e.srv.forward("Eval.Nack", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "nack"}, time.Now())

// Nack the EvalID
if err := e.srv.evalBroker.Nack(args.EvalID, args.Token); err != nil {
Expand All @@ -215,15 +221,17 @@ func (e *Eval) Nack(args *structs.EvalAckRequest,
// Update is used to perform an update of an Eval if it is outstanding.
func (e *Eval) Update(args *structs.EvalUpdateRequest,
reply *structs.GenericResponse) error {
if done, err := e.srv.forward("Eval.Update", args, args, reply); done {

// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "update"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
if done, err := e.srv.forward("Eval.Update", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "update"}, time.Now())

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
Expand All @@ -250,15 +258,17 @@ func (e *Eval) Update(args *structs.EvalUpdateRequest,
// Create is used to make a new evaluation
func (e *Eval) Create(args *structs.EvalUpdateRequest,
reply *structs.GenericResponse) error {
if done, err := e.srv.forward("Eval.Create", args, args, reply); done {

// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "create"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
if done, err := e.srv.forward("Eval.Create", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "create"}, time.Now())

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
Expand Down Expand Up @@ -300,15 +310,16 @@ func (e *Eval) Create(args *structs.EvalUpdateRequest,
// Reblock is used to reinsert an existing blocked evaluation into the blocked
// evaluation tracker.
func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericResponse) error {
if done, err := e.srv.forward("Eval.Reblock", args, args, reply); done {
// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "reblock"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
if done, err := e.srv.forward("Eval.Reblock", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "reblock"}, time.Now())

// Ensure there is only a single update with token
if len(args.Evals) != 1 {
Expand Down Expand Up @@ -347,15 +358,17 @@ func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericRe
// Reap is used to cleanup dead evaluations and allocations
func (e *Eval) Reap(args *structs.EvalDeleteRequest,
reply *structs.GenericResponse) error {
if done, err := e.srv.forward("Eval.Reap", args, args, reply); done {

// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(e.srv, e.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "reap"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(e.srv, e.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", e.srv.Region(), err)
if done, err := e.srv.forward("Eval.Reap", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "eval", "reap"}, time.Now())

// Update via Raft
_, index, err := e.srv.raftApply(structs.EvalDeleteRequestType, args)
Expand Down
22 changes: 12 additions & 10 deletions nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1098,15 +1098,16 @@ func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,

// UpdateAlloc is used to update the client status of an allocation
func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
// Ensure the connection was initiated by another client if TLS is used.
err := validateTLSCertificateLevel(n.srv, n.ctx, tlsCertificateLevelClient)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(n.srv, n.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", n.srv.Region(), err)
if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())

// Ensure at least a single alloc
if len(args.Alloc) == 0 {
Expand Down Expand Up @@ -1920,15 +1921,16 @@ func taskUsesConnect(task *structs.Task) bool {
}

func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error {
if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done {
// Ensure the connection was initiated by another client if TLS is used.
err := validateTLSCertificateLevel(n.srv, n.ctx, tlsCertificateLevelClient)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now())

// Ensure the connection was initiated by a client if TLS is used.
if err := validateLocalClientTLSCertificate(n.srv, n.ctx); err != nil {
return fmt.Errorf("invalid client connection in region %s: %v", n.srv.Region(), err)
if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now())

if len(args.NodeEvents) == 0 {
return fmt.Errorf("no node events given")
Expand Down
11 changes: 6 additions & 5 deletions nomad/plan_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@ type Plan struct {

// Submit is used to submit a plan to the leader
func (p *Plan) Submit(args *structs.PlanRequest, reply *structs.PlanResponse) error {
if done, err := p.srv.forward("Plan.Submit", args, args, reply); done {
// Ensure the connection was initiated by another server if TLS is used.
err := validateTLSCertificateLevel(p.srv, p.ctx, tlsCertificateLevelServer)
if err != nil {
return err
}
defer metrics.MeasureSince([]string{"nomad", "plan", "submit"}, time.Now())

// Ensure the connection was initiated by another server if TLS is used.
if err := validateLocalServerTLSCertificate(p.srv, p.ctx); err != nil {
return fmt.Errorf("invalid server connection in region %s: %v", p.srv.Region(), err)
if done, err := p.srv.forward("Plan.Submit", args, args, reply); done {
return err
}
defer metrics.MeasureSince([]string{"nomad", "plan", "submit"}, time.Now())

if args.Plan == nil {
return fmt.Errorf("cannot submit nil plan")
Expand Down
11 changes: 5 additions & 6 deletions nomad/rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,16 +127,15 @@ func (ctx *RPCContext) ValidateCertificateForName(name string) error {
if cert == nil {
return errors.New("missing certificate information")
}
for _, dnsName := range cert.DNSNames {
if dnsName == name {

validNames := append(cert.DNSNames, cert.Subject.CommonName)
for _, valid := range validNames {
if name == valid {
return nil
}
}
if cert.Subject.CommonName == name {
return nil
}

return fmt.Errorf("certificate not valid for %q", name)
return fmt.Errorf("invalid certificate, %s not in %s", name, strings.Join(validNames, ","))
}

// listen is used to listen for incoming RPC connections
Expand Down
Loading

0 comments on commit 2e72cde

Please sign in to comment.