Skip to content

Commit

Permalink
Clear conntrack entries for UDP ports
Browse files Browse the repository at this point in the history
Conntrack entries are created for UDP flows even if there's nowhere to
route these packets (ie. no listening socket and no NAT rules to
apply). Moreover, iptables NAT rules are evaluated by netfilter only
when creating a new conntrack entry.

When Docker adds NAT rules, netfilter will ignore them for any packet
matching a pre-existing conntrack entry. In such case, when
dockerd runs with userland proxy enabled, packets got routed to it and
the main symptom will be bad source IP address (as shown by moby#44688).

If the publishing container is run through Docker Swarm or in
"standalone" Docker but with no userland proxy, affected packets will
be dropped (eg. routed to nowhere).

As such, Docker needs to flush all conntrack entries for published UDP
ports to make sure NAT rules are correctly applied to all packets.

Fixes (at least) moby#44688, moby#8795, moby#16720, moby#7540.

Signed-off-by: Albin Kerouanton <albinker@gmail.com>
  • Loading branch information
akerouanton committed Jan 4, 2023
1 parent d109e42 commit c12a67a
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 0 deletions.
10 changes: 10 additions & 0 deletions libnetwork/drivers/bridge/bridge.go
Expand Up @@ -1333,6 +1333,16 @@ func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string
// bound to the local proxy and won't bre redirect to the new endpoints.
clearEndpointConnections(d.nlh, endpoint)

// Conntrack entries are created for UDP flows even if there's nowhere to
// route these packets (ie. no listening socket and no NAT rules to
// apply). Moreover, iptables NAT rules are evaluated by netfilter only
// when creating a new conntrack entry.
// When Docker adds NAT rules, netfilter ignore them for any packet
// matching a pre-existing conntrack entry.
// As such, we need to flush all conntrack entries for published UDP
// ports to make sure NAT rules are correctly applied to all packets.
clearConnectionsToUDPPorts(d.nlh, endpoint)

if err = d.storeUpdate(endpoint); err != nil {
return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
}
Expand Down
13 changes: 13 additions & 0 deletions libnetwork/drivers/bridge/setup_ip_tables.go
Expand Up @@ -9,6 +9,7 @@ import (
"net"

"github.com/docker/docker/libnetwork/iptables"
"github.com/docker/docker/libnetwork/types"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
Expand Down Expand Up @@ -429,3 +430,15 @@ func clearEndpointConnections(nlh *netlink.Handle, ep *bridgeEndpoint) {
}
iptables.DeleteConntrackEntries(nlh, ipv4List, ipv6List)
}

func clearConnectionsToUDPPorts(nlh *netlink.Handle, ep *bridgeEndpoint) {
var udpPorts []uint16

for _, pb := range ep.portMapping {
if pb.Proto == types.UDP {
udpPorts = append(udpPorts, pb.HostPort)
}
}

iptables.DeleteConntrackEntriesByPort(nlh, types.UDP, udpPorts)
}
37 changes: 37 additions & 0 deletions libnetwork/iptables/conntrack.go
Expand Up @@ -8,6 +8,7 @@ import (
"net"
"syscall"

"github.com/docker/docker/libnetwork/types"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
)
Expand Down Expand Up @@ -53,6 +54,42 @@ func DeleteConntrackEntries(nlh *netlink.Handle, ipv4List []net.IP, ipv6List []n
return totalIPv4FlowPurged, totalIPv6FlowPurged, nil
}

func DeleteConntrackEntriesByPort(nlh *netlink.Handle, proto types.Protocol, ports []uint16) error {
if !IsConntrackProgrammable(nlh) {
return ErrConntrackNotConfigurable
}

var totalIPv4FlowPurged uint
var totalIPv6FlowPurged uint

for _, port := range ports {
filter := &netlink.ConntrackFilter{}
if err := filter.AddProtocol(uint8(proto)); err != nil {
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
continue
}
if err := filter.AddPort(netlink.ConntrackOrigDstPort, port); err != nil {
logrus.Warnf("Failed to delete conntrack state for %s port %d: %v", proto.String(), port, err)
continue
}

v4FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET, filter)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for IPv4 %s port %d: %v", proto.String(), port, err)
}
totalIPv4FlowPurged += v4FlowPurged

v6FlowPurged, err := nlh.ConntrackDeleteFilter(netlink.ConntrackTable, syscall.AF_INET6, filter)
if err != nil {
logrus.Warnf("Failed to delete conntrack state for IPv6 %s port %d: %v", proto.String(), port, err)
}
totalIPv6FlowPurged += v6FlowPurged
}

logrus.Debugf("DeleteConntrackEntriesByPort for %s ports purged ipv4:%d, ipv6:%d", proto.String(), totalIPv4FlowPurged, totalIPv6FlowPurged)
return nil
}

func purgeConntrackState(nlh *netlink.Handle, family netlink.InetFamily, ipAddress net.IP) (uint, error) {
filter := &netlink.ConntrackFilter{}
// NOTE: doing the flush using the ipAddress is safe because today there cannot be multiple networks with the same subnet
Expand Down

0 comments on commit c12a67a

Please sign in to comment.