Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[17.09] Overlay fix for transient IP reuse #2016

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion drivers/bridge/bridge_test.go
Expand Up @@ -476,7 +476,7 @@ func verifyV4INCEntries(networks map[string]*bridgeNetwork, numEntries int, t *t
if x == y {
continue
}
re := regexp.MustCompile(fmt.Sprintf("%s %s", x.config.BridgeName, y.config.BridgeName))
re := regexp.MustCompile(x.config.BridgeName + " " + y.config.BridgeName)
matches := re.FindAllString(string(out[:]), -1)
if len(matches) != 1 {
t.Fatalf("Cannot find expected inter-network isolation rules in IP Tables:\n%s", string(out[:]))
Expand Down
2 changes: 1 addition & 1 deletion drivers/ipvlan/ipvlan_setup.go
Expand Up @@ -201,5 +201,5 @@ func delDummyLink(linkName string) error {

// getDummyName returns the name of a dummy parent with truncated net ID and driver prefix
func getDummyName(netID string) string {
return fmt.Sprintf("%s%s", dummyPrefix, netID)
return dummyPrefix + netID
}
2 changes: 1 addition & 1 deletion drivers/macvlan/macvlan_setup.go
Expand Up @@ -205,5 +205,5 @@ func delDummyLink(linkName string) error {

// getDummyName returns the name of a dummy parent with truncated net ID and driver prefix
func getDummyName(netID string) string {
return fmt.Sprintf("%s%s", dummyPrefix, netID)
return dummyPrefix + netID
}
1 change: 0 additions & 1 deletion drivers/overlay/encryption.go
Expand Up @@ -21,7 +21,6 @@ import (

const (
r = 0xD0C4E3
timeout = 30
pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8)
)

Expand Down
18 changes: 8 additions & 10 deletions drivers/overlay/joinleave.go
Expand Up @@ -68,7 +68,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,

ep.ifName = containerIfName

if err := d.writeEndpointToStore(ep); err != nil {
if err = d.writeEndpointToStore(ep); err != nil {
return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
}

Expand All @@ -86,7 +86,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
return err
}

if err := sbox.AddInterface(overlayIfName, "veth",
if err = sbox.AddInterface(overlayIfName, "veth",
sbox.InterfaceOptions().Master(s.brName)); err != nil {
return fmt.Errorf("could not add veth pair inside the network sandbox: %v", err)
}
Expand All @@ -100,15 +100,15 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
return err
}

if err := nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil {
if err = nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil {
return fmt.Errorf("could not set mac address (%v) to the container interface: %v", ep.mac, err)
}

for _, sub := range n.subnets {
if sub == s {
continue
}
if err := jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil {
if err = jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil {
logrus.Errorf("Adding subnet %s static route in network %q failed\n", s.subnetIP, n.id)
}
}
Expand All @@ -122,7 +122,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,

d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true)

if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
if err = d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
logrus.Warn(err)
}

Expand Down Expand Up @@ -200,7 +200,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri
}

if etype == driverapi.Delete {
d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep)
d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, false)
return
}

Expand Down Expand Up @@ -232,11 +232,9 @@ func (d *driver) Leave(nid, eid string) error {
}
}

n.leaveSandbox()
d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true)

if err := d.checkEncryption(nid, nil, 0, true, false); err != nil {
logrus.Warn(err)
}
n.leaveSandbox()

return nil
}
70 changes: 19 additions & 51 deletions drivers/overlay/ov_network.go
Expand Up @@ -251,8 +251,9 @@ func (d *driver) DeleteNetwork(nid string) error {
if err := d.deleteEndpointFromStore(ep); err != nil {
logrus.Warnf("Failed to delete overlay endpoint %s from local store: %v", ep.id[0:7], err)
}

}
// flush the peerDB entries
d.peerFlush(nid)
d.deleteNetwork(nid)

vnis, err := n.releaseVxlanID()
Expand Down Expand Up @@ -494,7 +495,7 @@ func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) erro
brIfaceOption := make([]osl.IfaceOption, 2)
brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP))
brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true))
Ifaces[fmt.Sprintf("%s+%s", brName, "br")] = brIfaceOption
Ifaces[brName+"+br"] = brIfaceOption

err := sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
Expand All @@ -504,12 +505,8 @@ func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) erro
Ifaces = make(map[string][]osl.IfaceOption)
vxlanIfaceOption := make([]osl.IfaceOption, 1)
vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName))
Ifaces[fmt.Sprintf("%s+%s", vxlanName, "vxlan")] = vxlanIfaceOption
err = sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
return err
}
return nil
Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption
return sbox.Restore(Ifaces, nil, nil, nil)
}

func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
Expand Down Expand Up @@ -760,58 +757,38 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
continue
}

logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)

if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
continue
}

if n.driver.isSerfAlive() {
logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip)
if err != nil {
logrus.Errorf("could not resolve peer %q: %v", ip, err)
continue
}
n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false)
} else {
// If the gc_thresh values are lower kernel might knock off the neighor entries.
// When we get a L3 miss check if its a valid peer and reprogram the neighbor
// entry again. Rate limit it to once attempt every 500ms, just in case a faulty
// container sends a flood of packets to invalid peers
if !l3Miss {
continue
}
if time.Since(t) > 500*time.Millisecond {
} else if l3Miss && time.Since(t) > time.Second {
// All the local peers will trigger a miss notification but this one is expected and the local container will reply
// autonomously to the ARP request
// In case the gc_thresh3 values is low kernel might reject new entries during peerAdd. This will trigger the following
// extra logs that will inform of the possible issue.
// Entries created would not be deleted see documentation http://man7.org/linux/man-pages/man7/arp.7.html:
// Entries which are marked as permanent are never deleted by the garbage-collector.
// The time limit here is to guarantee that the dbSearch is not
// done too frequently causing a stall of the peerDB operations.
pKey, pEntry, err := n.driver.peerDbSearch(n.id, ip)
if err == nil && !pEntry.isLocal {
t = time.Now()
n.programNeighbor(ip)
logrus.Warnf("miss notification for peer:%+v l3Miss:%t l2Miss:%t, if the problem persist check the gc_thresh on the host pKey:%+v pEntry:%+v err:%v",
neigh, l3Miss, l2Miss, *pKey, *pEntry, err)
}
}
}
}
}

func (n *network) programNeighbor(ip net.IP) {
peerMac, _, _, err := n.driver.peerDbSearch(n.id, ip)
if err != nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, no peer entry", ip)
return
}
s := n.getSubnetforIPAddr(ip)
if s == nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, not a valid subnet", ip)
return
}
sbox := n.sandbox()
if sbox == nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, overlay sandbox missing", ip)
return
}
if err := sbox.AddNeighbor(ip, peerMac, true, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s: %v", ip, err)
return
}
}

func (d *driver) addNetwork(n *network) {
d.Lock()
d.networks[n.id] = n
Expand Down Expand Up @@ -1090,15 +1067,6 @@ func (n *network) contains(ip net.IP) bool {
return false
}

func (n *network) getSubnetforIPAddr(ip net.IP) *subnet {
for _, s := range n.subnets {
if s.subnetIP.Contains(ip) {
return s
}
}
return nil
}

// getSubnetforIP returns the subnet to which the given IP belongs
func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
for _, s := range n.subnets {
Expand Down
8 changes: 4 additions & 4 deletions drivers/overlay/ov_serf.go
Expand Up @@ -122,7 +122,7 @@ func (d *driver) processEvent(u serf.UserEvent) {
case "join":
d.peerAdd(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false, false, false)
case "leave":
d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr))
d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false)
}
}

Expand All @@ -135,13 +135,13 @@ func (d *driver) processQuery(q *serf.Query) {
fmt.Printf("Failed to scan query payload string: %v\n", err)
}

peerMac, peerIPMask, vtep, err := d.peerDbSearch(nid, net.ParseIP(ipStr))
pKey, pEntry, err := d.peerDbSearch(nid, net.ParseIP(ipStr))
if err != nil {
return
}

logrus.Debugf("Sending peer query resp mac %s, mask %s, vtep %s", peerMac, net.IP(peerIPMask), vtep)
q.Respond([]byte(fmt.Sprintf("%s %s %s", peerMac.String(), net.IP(peerIPMask).String(), vtep.String())))
logrus.Debugf("Sending peer query resp mac %v, mask %s, vtep %s", pKey.peerMac, net.IP(pEntry.peerIPMask).String(), pEntry.vtep)
q.Respond([]byte(fmt.Sprintf("%s %s %s", pKey.peerMac.String(), net.IP(pEntry.peerIPMask).String(), pEntry.vtep.String())))
}

func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) {
Expand Down
6 changes: 3 additions & 3 deletions drivers/overlay/overlay.go
Expand Up @@ -162,7 +162,7 @@ func (d *driver) restoreEndpoints() error {
Ifaces := make(map[string][]osl.IfaceOption)
vethIfaceOption := make([]osl.IfaceOption, 1)
vethIfaceOption = append(vethIfaceOption, n.sbox.InterfaceOptions().Master(s.brName))
Ifaces[fmt.Sprintf("%s+%s", "veth", "veth")] = vethIfaceOption
Ifaces["veth+veth"] = vethIfaceOption

err := n.sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
Expand Down Expand Up @@ -262,15 +262,15 @@ func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
d.Unlock()

// If containers are already running on this network update the
// advertiseaddress in the peerDB
// advertise address in the peerDB
d.localJoinOnce.Do(func() {
d.peerDBUpdateSelf()
})

// If there is no cluster store there is no need to start serf.
if d.store != nil {
if err := validateSelf(advertiseAddress); err != nil {
logrus.Warnf("%s", err.Error())
logrus.Warn(err.Error())
}
err := d.serfInit()
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion drivers/overlay/overlay.proto
Expand Up @@ -24,4 +24,4 @@ message PeerRecord {
// which this container is running and can be reached by
// building a tunnel to that host IP.
string tunnel_endpoint_ip = 3 [(gogoproto.customname) = "TunnelEndpointIP"];
}
}