This repository has been archived by the owner on Aug 13, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 14
Add ability for control servers to broadcast to hubs via gRPC #40
Open
evanphx
wants to merge
4
commits into
main
Choose a base branch
from
f-grpc-inbound
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
f091516
pkg/{hub,control}, cmd/hzn: Add ability for control to monitor hubs i…
evanphx 0bddc27
pkg/{control,hub,pb}: Add gRPC server within hub
evanphx e82d268
pkg/control: Add ability to broadcast changes to hubs via grpc
evanphx 7d82851
Docs and local var change
evanphx File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
package control | ||
|
||
import ( | ||
context "context" | ||
"crypto/tls" | ||
"crypto/x509" | ||
"sync" | ||
|
||
"github.com/hashicorp/go-hclog" | ||
"github.com/hashicorp/go-multierror" | ||
"github.com/hashicorp/horizon/pkg/grpc/lz4" | ||
grpctoken "github.com/hashicorp/horizon/pkg/grpc/token" | ||
"github.com/hashicorp/horizon/pkg/pb" | ||
"github.com/hashicorp/horizon/pkg/utils" | ||
"google.golang.org/grpc" | ||
gcreds "google.golang.org/grpc/credentials" | ||
) | ||
|
||
// HubCatalog is a simple interface to decouple the gather and management of hub addresses from | ||
// the code that broadcasts to them. This is implemented by ConsulMonitor and used | ||
// primarily in production. | ||
type HubCatalog interface { | ||
Targets() []string | ||
} | ||
|
||
// Broadcaster is a simple fan out value. The commands sent to it via funciton calls are | ||
// fanned out to all targets in the given HubCatalog. | ||
type Broadcaster struct { | ||
L hclog.Logger | ||
catalog HubCatalog | ||
conn func(addr string) (pb.HubServicesClient, error) | ||
} | ||
|
||
// NewBroadcaster creates a new Broadcaster value. The targets to broadcast to come from | ||
// catalog. conn is how we actually open a connection to the target. This conn decoupling | ||
// makes this code much easier to test. In production, conn is usually GRPCDial.Dial. | ||
func NewBroadcaster( | ||
L hclog.Logger, | ||
catalog HubCatalog, | ||
conn func(addr string) (pb.HubServicesClient, error), | ||
) (*Broadcaster, error) { | ||
br := &Broadcaster{ | ||
L: L, | ||
catalog: catalog, | ||
conn: conn, | ||
} | ||
|
||
return br, nil | ||
} | ||
|
||
// AdvertiseServices gets a list of targets from the catalog and calls AddService | ||
// on the clients generated from the connect function (which defaults to dialing a grpc | ||
// connection to the target) | ||
func (b *Broadcaster) AdvertiseServices(ctx context.Context, as *pb.AccountServices) error { | ||
var topError error | ||
|
||
targets := b.catalog.Targets() | ||
|
||
b.L.Info("hub broadcasting beginning", "targets", len(targets)) | ||
|
||
for _, tgt := range targets { | ||
b.L.Info("broadcasting hub update", "target", tgt) | ||
client, err := b.conn(tgt) | ||
if err != nil { | ||
topError = multierror.Append(topError, err) | ||
continue | ||
} | ||
|
||
_, err = client.AddServices(ctx, as) | ||
if err != nil { | ||
topError = multierror.Append(topError, err) | ||
} | ||
} | ||
|
||
return topError | ||
} | ||
|
||
// GRPCDial provides connection pooling grpc connections to hubs. It is used to | ||
// avoid spinning up new TCP connections to hubs on every advertise operation. | ||
type GRPCDial struct { | ||
token string | ||
cert []byte | ||
|
||
mu sync.RWMutex | ||
grpcConns map[string]*grpc.ClientConn | ||
|
||
tlscfg tls.Config | ||
} | ||
|
||
// NewGRPCDial creates a new GRPCDial value. The given token is the authentication | ||
// token that will be included with all calls to the hubs, to identify them as valid. | ||
// cert is a TLS certification that, if set, will be used as the only cert in the TLS | ||
// RootCAs. This further restricts the code to calling valid hubs by making sure that | ||
// the code is only talking to hubs that are using the certs managed by control. | ||
func NewGRPCDial(token string, cert []byte) (*GRPCDial, error) { | ||
g := &GRPCDial{ | ||
token: token, | ||
cert: cert, | ||
grpcConns: make(map[string]*grpc.ClientConn), | ||
} | ||
|
||
if g.cert != nil { | ||
parsedHubCert, err := utils.ParseCertificate(cert) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
g.tlscfg.RootCAs = x509.NewCertPool() | ||
g.tlscfg.RootCAs.AddCert(parsedHubCert) | ||
} | ||
|
||
return g, nil | ||
} | ||
|
||
// Dial gets a gRPC client for target. It either generates a new gRPC connection | ||
// to the given target, used as a host:port combo. Or it returns a existing | ||
// connection. | ||
func (g *GRPCDial) Dial(target string) (pb.HubServicesClient, error) { | ||
g.mu.RLock() | ||
cc, ok := g.grpcConns[target] | ||
g.mu.RUnlock() | ||
|
||
if ok { | ||
return pb.NewHubServicesClient(cc), nil | ||
} | ||
|
||
g.mu.Lock() | ||
defer g.mu.Unlock() | ||
|
||
// There is a race here so we have to check again. | ||
cc, ok = g.grpcConns[target] | ||
if ok { | ||
return pb.NewHubServicesClient(cc), nil | ||
} | ||
|
||
opts := []grpc.DialOption{ | ||
grpc.WithDefaultCallOptions(grpc.UseCompressor(lz4.Name)), | ||
} | ||
|
||
if g.token != "" { | ||
opts = append(opts, grpc.WithPerRPCCredentials(grpctoken.Token(g.token))) | ||
} | ||
|
||
creds := gcreds.NewTLS(&g.tlscfg) | ||
|
||
opts = append(opts, grpc.WithTransportCredentials(creds)) | ||
|
||
cc, err := grpc.Dial(target, opts...) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
g.grpcConns[target] = cc | ||
|
||
return pb.NewHubServicesClient(cc), nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package control | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
|
||
"github.com/hashicorp/go-hclog" | ||
"github.com/hashicorp/horizon/pkg/pb" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
"google.golang.org/grpc" | ||
) | ||
|
||
type fakeCatalog struct { | ||
targets []string | ||
} | ||
|
||
func (f *fakeCatalog) Targets() []string { | ||
return f.targets | ||
} | ||
|
||
type fakeClient struct { | ||
addr string | ||
services []*pb.AccountServices | ||
} | ||
|
||
func (f *fakeClient) AddServices(ctx context.Context, in *pb.AccountServices, opts ...grpc.CallOption) (*pb.Noop, error) { | ||
f.services = append(f.services, in) | ||
return &pb.Noop{}, nil | ||
} | ||
|
||
func (f *fakeClient) AddLabeLink(ctx context.Context, in *pb.LabelLinks, opts ...grpc.CallOption) (*pb.Noop, error) { | ||
panic("not implemented") // TODO: Implement | ||
} | ||
|
||
func TestBroadcaster(t *testing.T) { | ||
t.Run("fans out new account services to all hubs", func(t *testing.T) { | ||
var ( | ||
fcat fakeCatalog | ||
fcli fakeClient | ||
) | ||
|
||
conn := func(addr string) (pb.HubServicesClient, error) { | ||
fcli.addr = addr | ||
|
||
return &fcli, nil | ||
} | ||
|
||
fcat.targets = []string{"1.2.3.4"} | ||
|
||
bc, err := NewBroadcaster(hclog.L(), &fcat, conn) | ||
require.NoError(t, err) | ||
|
||
as := &pb.AccountServices{ | ||
Account: &pb.Account{ | ||
Namespace: "/", | ||
AccountId: pb.NewULID(), | ||
}, | ||
Services: []*pb.ServiceRoute{ | ||
{ | ||
Hub: pb.NewULID(), | ||
}, | ||
}, | ||
} | ||
|
||
ctx, cancel := context.WithCancel(context.Background()) | ||
defer cancel() | ||
|
||
err = bc.AdvertiseServices(ctx, as) | ||
require.NoError(t, err) | ||
|
||
assert.Equal(t, "1.2.3.4", fcli.addr) | ||
require.Equal(t, 1, len(fcli.services)) | ||
assert.Equal(t, as, fcli.services[0]) | ||
|
||
}) | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need to use both
RLock
andLock
here in sequence? I guess this relates to your comment about there being a race condition -- does the race condition require we use aLock
on our second check instead of anRLock
(is there a difference in behavior that contributes to a race condition), or is the reasoning "we need to do a Write, but first we have to check that Read didn't flake out on us previously, and that check has to be within the same Lock as the eventual write-action"?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think of it like a table lock in SQL. RLock locks all threads from reading, then sets the current target. Such that everyone behind this current Dial call has to wait to read until it is done. Only then can hypothetically the next person in line read the value, or not to see that it was changed by the person ahead of them. But this is in the weeds for me.