Skip to content

Commit

Permalink
Gossip Discovery small fix
Browse files Browse the repository at this point in the history
Fixes a scenario in which a node is resurrected, but isn't found in the dead list.
Then it needs to be added to the alive list regardless.

Also fixes a bug in which when expiring a peer,
if it has no recent seen timestamp- do not move it
to the lastDeadTS because no such exists.

Change-Id: I7b5fdd21abc4dffcf2bcc03c0d9a9190d19d3506
Signed-off-by: Yacov Manevich <yacovm@il.ibm.com>
  • Loading branch information
yacovm committed Sep 29, 2016
1 parent 03c4a70 commit ecc4ea8
Showing 1 changed file with 19 additions and 16 deletions.
35 changes: 19 additions & 16 deletions gossip/discovery/discovery_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,19 +280,12 @@ func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.AliveMessage) {
d.logger.Debug("Got alive message about ourselves,", m)
return
}
endpoint := m.Membership.Endpoint
ts := m.Timestamp
meta := m.Membership.Metadata

d.lock.RLock()
_, known := d.id2Member[id]
d.lock.RUnlock()

netMember := &NetworkMember{
Id: id,
Endpoint: endpoint,
Metadata: meta,
}

if !known {
d.learnNewMembers([]*proto.AliveMessage{m}, []*proto.AliveMessage{})
Expand All @@ -316,7 +309,7 @@ func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.AliveMessage) {

if !isAlive && uint64(lastDeadTS.incTime.Nanosecond()) <= ts.IncNumber && lastDeadTS.seqNum < ts.SeqNum {
// resurrect peer
d.resurrectMember(id, netMember, *ts)
d.resurrectMember(m, *ts)
return
}

Expand All @@ -332,30 +325,37 @@ func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.AliveMessage) {
// else, ignore the message because it is too old
}

func (d *gossipDiscoveryImpl) resurrectMember(id string, member *NetworkMember, t proto.PeerTime) {
d.logger.Info("Entering, id =", id, "member = ", member, "t = ", t)
func (d *gossipDiscoveryImpl) resurrectMember(m *proto.AliveMessage, t proto.PeerTime) {
d.logger.Info("Entering,", m, t)
defer d.logger.Info("Exiting")
d.lock.Lock()
defer d.lock.Unlock()

id := m.Membership.Id

d.aliveLastTS[id] = &timestamp{
lastSeen: time.Now(),
seqNum: t.SeqNum,
incTime: tsToTime(t.IncNumber),
}

d.id2Member[id] = member
d.id2Member[id] = &NetworkMember{
Id: id,
Endpoint: m.Membership.Endpoint,
Metadata: m.Membership.Metadata,
}
delete(d.deadLastTS, id)
aliveMsgWithId := &proto.AliveMessage{
Membership: &proto.Member{Id: id},
}

// If the member is in the dead list, delete it from there
i := util.IndexInSlice(d.cachedMembership.Dead, aliveMsgWithId, sameIdAliveMessages)
if i != -1 {
resurrectedMember := d.cachedMembership.Dead[i]
d.cachedMembership.Dead = append(d.cachedMembership.Dead[:i], d.cachedMembership.Dead[i+1:]...)
d.cachedMembership.Alive = append(d.cachedMembership.Alive, resurrectedMember)
}
// add the member to the alive list
d.cachedMembership.Alive = append(d.cachedMembership.Alive, m)
}

func (d *gossipDiscoveryImpl) periodicalReconnectToDead() {
Expand Down Expand Up @@ -467,9 +467,12 @@ func (d *gossipDiscoveryImpl) expireDeadMembers(dead []string) {
for _, id := range dead {
d.comm.CloseConn(id)
// move lastTS from alive to dead
lastTS := d.aliveLastTS[id]
d.deadLastTS[id] = lastTS
delete(d.aliveLastTS, id)
lastTS, hasLastTS := d.aliveLastTS[id]
if hasLastTS {
d.deadLastTS[id] = lastTS
delete(d.aliveLastTS, id)
}

aliveMsgWithId := &proto.AliveMessage{
Membership: &proto.Member{Id: id},
}
Expand Down

0 comments on commit ecc4ea8

Please sign in to comment.