Skip to content

Commit

Permalink
Fix IP overlap with empty EndpointSpec
Browse files Browse the repository at this point in the history
Passing and empty EndpointSpec in the service spec
was correctly triggering the VIP allocation but
the leader election was erroneusly handling the IPAM
state restore trying to release the VIP.
The fix focuses on proper handling of the restart case.

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
  • Loading branch information
Flavio Crisciani committed Feb 7, 2018
1 parent 607c4a0 commit 03eda38
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 7 deletions.
143 changes: 143 additions & 0 deletions manager/allocator/allocator_test.go
Expand Up @@ -796,6 +796,149 @@ func TestAllocatorRestoreForDuplicateIPs(t *testing.T) {
}
}

// TestAllocatorRestartNoEndpointSpec covers the leader election case when the service Spec
// does not contains the EndpointSpec.
// The expected behavior iis that the VIP(s) are still correctly populated inside
// the IPAM and that no configuration on the service is changed.
func TestAllocatorRestartNoEndpointSpec(t *testing.T) {
s := store.NewMemoryStore(nil)
assert.NotNil(t, s)
defer s.Close()
// Create 3 services with 1 task each
numsvcstsks := 3
assert.NoError(t, s.Update(func(tx store.Tx) error {
// populate ingress network
in := &api.Network{
ID: "overlay1",
Spec: api.NetworkSpec{
Annotations: api.Annotations{
Name: "net1",
},
},
}
assert.NoError(t, store.CreateNetwork(tx, in))

for i := 0; i != numsvcstsks; i++ {
svc := &api.Service{
ID: "testServiceID" + strconv.Itoa(i),
Spec: api.ServiceSpec{
Annotations: api.Annotations{
Name: "service" + strconv.Itoa(i),
},
// Endpoint: &api.EndpointSpec{
// Mode: api.ResolutionModeVirtualIP,
// },
Task: api.TaskSpec{
Networks: []*api.NetworkAttachmentConfig{
{
Target: "overlay1",
},
},
},
},
Endpoint: &api.Endpoint{
Spec: &api.EndpointSpec{
Mode: api.ResolutionModeVirtualIP,
},
VirtualIPs: []*api.Endpoint_VirtualIP{
{
NetworkID: "overlay1",
Addr: "10.0.0." + strconv.Itoa(2+2*i) + "/24",
},
},
},
}
assert.NoError(t, store.CreateService(tx, svc))
}
return nil
}))

for i := 0; i != numsvcstsks; i++ {
assert.NoError(t, s.Update(func(tx store.Tx) error {
tsk := &api.Task{
ID: "testTaskID" + strconv.Itoa(i),
Status: api.TaskStatus{
State: api.TaskStateNew,
},
ServiceID: "testServiceID" + strconv.Itoa(i),
DesiredState: api.TaskStateRunning,
Networks: []*api.NetworkAttachment{
{
Network: &api.Network{
ID: "overlay1",
},
},
},
}
assert.NoError(t, store.CreateTask(tx, tsk))
return nil
}))
}

expectedIPs := map[string]string{
"testServiceID0": "10.0.0.2/24",
"testServiceID1": "10.0.0.4/24",
"testServiceID2": "10.0.0.6/24",
"testTaskID0": "10.0.0.3/24",
"testTaskID1": "10.0.0.5/24",
"testTaskID2": "10.0.0.7/24",
}
assignedIPs := make(map[string]bool)
hasNoIPOverlapServices := func(fakeT assert.TestingT, service *api.Service) bool {
assert.NotEqual(fakeT, len(service.Endpoint.VirtualIPs), 0)
assert.NotEqual(fakeT, len(service.Endpoint.VirtualIPs[0].Addr), 0)

assignedVIP := service.Endpoint.VirtualIPs[0].Addr
if assignedIPs[assignedVIP] {
t.Fatalf("service %s assigned duplicate IP %s", service.ID, assignedVIP)
}
assignedIPs[assignedVIP] = true
ip, ok := expectedIPs[service.ID]
assert.True(t, ok)
assert.Equal(t, ip, assignedVIP)
delete(expectedIPs, service.ID)
return true
}

hasNoIPOverlapTasks := func(fakeT assert.TestingT, s *store.MemoryStore, task *api.Task) bool {
assert.NotEqual(fakeT, len(task.Networks), 0)
assert.NotEqual(fakeT, len(task.Networks[0].Addresses), 0)

assignedIP := task.Networks[0].Addresses[0]
if assignedIPs[assignedIP] {
t.Fatalf("task %s assigned duplicate IP %s", task.ID, assignedIP)
}
assignedIPs[assignedIP] = true
ip, ok := expectedIPs[task.ID]
assert.True(t, ok)
assert.Equal(t, ip, assignedIP)
delete(expectedIPs, task.ID)
return true
}

a, err := New(s, nil)
assert.NoError(t, err)
assert.NotNil(t, a)
// Start allocator
go func() {
assert.NoError(t, a.Run(context.Background()))
}()
defer a.Stop()

taskWatch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{}, api.EventDeleteTask{})
defer cancel()

serviceWatch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateService{}, api.EventDeleteService{})
defer cancel()

// Confirm tasks have no IPs that overlap with the services VIPs on restart
for i := 0; i != numsvcstsks; i++ {
watchTask(t, s, taskWatch, false, hasNoIPOverlapTasks)
watchService(t, serviceWatch, false, hasNoIPOverlapServices)
}
assert.Len(t, expectedIPs, 0)
}

func TestNodeAllocator(t *testing.T) {
s := store.NewMemoryStore(nil)
assert.NotNil(t, s)
Expand Down
2 changes: 2 additions & 0 deletions manager/allocator/cnmallocator/networkallocator.go
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/docker/swarmkit/log"
"github.com/docker/swarmkit/manager/allocator/networkallocator"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
)

Expand Down Expand Up @@ -244,6 +245,7 @@ vipLoop:
}
for _, nAttach := range specNetworks {
if nAttach.Target == eAttach.NetworkID {
log.L.WithFields(logrus.Fields{"service_id": s.ID, "vip": eAttach.Addr}).Debug("allocate vip")
if err = na.allocateVIP(eAttach); err != nil {
return err
}
Expand Down
19 changes: 12 additions & 7 deletions manager/allocator/network.go
Expand Up @@ -244,7 +244,7 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) {
break
}

if err := a.allocateService(ctx, s); err != nil {
if err := a.allocateService(ctx, s, false); err != nil {
log.G(ctx).WithError(err).Errorf("Failed allocation for service %s", s.ID)
break
}
Expand Down Expand Up @@ -274,7 +274,7 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) {
}
updatePortsInHostPublishMode(s)
} else {
if err := a.allocateService(ctx, s); err != nil {
if err := a.allocateService(ctx, s, false); err != nil {
log.G(ctx).WithError(err).Errorf("Failed allocation during update of service %s", s.ID)
break
}
Expand Down Expand Up @@ -587,8 +587,8 @@ func (a *Allocator) allocateServices(ctx context.Context, existingAddressesOnly
continue
}

if err := a.allocateService(ctx, s); err != nil {
log.G(ctx).WithError(err).Errorf("failed allocating service %s during init", s.ID)
if err := a.allocateService(ctx, s, existingAddressesOnly); err != nil {
log.G(ctx).WithField("existingAddressesOnly", existingAddressesOnly).WithError(err).Errorf("failed allocating service %s during init", s.ID)
continue
}
allocatedServices = append(allocatedServices, s)
Expand Down Expand Up @@ -940,7 +940,10 @@ func updatePortsInHostPublishMode(s *api.Service) {
s.Endpoint.Spec = s.Spec.Endpoint.Copy()
}

func (a *Allocator) allocateService(ctx context.Context, s *api.Service) error {
// allocateService takes care to align the desired state with the spec passed
// the last parameter is true only during restart when the data are read from raft
// and used to build internal state
func (a *Allocator) allocateService(ctx context.Context, s *api.Service, existingAddressesOnly bool) error {
nc := a.netCtx

if s.Spec.Endpoint != nil {
Expand Down Expand Up @@ -972,7 +975,9 @@ func (a *Allocator) allocateService(ctx context.Context, s *api.Service) error {
&api.Endpoint_VirtualIP{NetworkID: nc.ingressNetwork.ID})
}
}
} else if s.Endpoint != nil {
} else if s.Endpoint != nil && !existingAddressesOnly {
// if we are in the restart phase there is no reason to try to deallocate anything because the state
// is not there
// service has no user-defined endpoints while has already allocated network resources,
// need deallocated.
if err := nc.nwkAllocator.DeallocateService(s); err != nil {
Expand Down Expand Up @@ -1188,7 +1193,7 @@ func (a *Allocator) procUnallocatedServices(ctx context.Context) {
var allocatedServices []*api.Service
for _, s := range nc.unallocatedServices {
if !nc.nwkAllocator.IsServiceAllocated(s) {
if err := a.allocateService(ctx, s); err != nil {
if err := a.allocateService(ctx, s, false); err != nil {
log.G(ctx).WithError(err).Debugf("Failed allocation of unallocated service %s", s.ID)
continue
}
Expand Down

0 comments on commit 03eda38

Please sign in to comment.