Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Install fib rules and routes with proto kernel to avoid systemd messing with them #24288

Merged
merged 14 commits into from
Mar 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 16 additions & 14 deletions bpf/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ NR_CPUS=${21}
ENDPOINT_ROUTES=${22}
PROXY_RULE=${23}
FILTER_PRIO=${24}
DEFAULT_RTPROTO=${25}
LOCAL_RULE_PRIO=${26}

ID_HOST=1
ID_WORLD=2
Expand Down Expand Up @@ -74,21 +76,21 @@ function move_local_rules_af()
return
fi

# move the local table lookup rule from pref 0 to pref 100 so we can
# insert the cilium ip rules before the local table. It is strictly
# move the local table lookup rule from pref 0 to pref LOCAL_RULE_PRIO so we
# can insert the cilium ip rules before the local table. It is strictly
# required to add the new local rule before deleting the old one as
# otherwise local addresses will not be reachable for a short period of
# time.
$IP rule list | grep 100 | grep "lookup local" || {
$IP rule add from all lookup local pref 100
$IP rule list | grep "${LOCAL_RULE_PRIO}" | grep "lookup local" || {
$IP rule add from all lookup local pref ${LOCAL_RULE_PRIO} proto $DEFAULT_RTPROTO
}
$IP rule del from all lookup local pref 0 2> /dev/null || true

# check if the move of the local table move was successful and restore
# it otherwise
if [ "$($IP rule list | grep "lookup local" | wc -l)" -eq "0" ]; then
$IP rule add from all lookup local pref 0
$IP rule del from all lookup local pref 100
$IP rule add from all lookup local pref 0 proto $DEFAULT_RTPROTO
$IP rule del from all lookup local pref ${LOCAL_RULE_PRIO}
echo "Error: The kernel does not support moving the local table routing rule"
echo "Local routing rules:"
$IP rule list lookup local
Expand All @@ -111,13 +113,13 @@ function setup_proxy_rules()
{
# Any packet from an ingress proxy uses a separate routing table that routes
# the packet back to the cilium host device.
from_ingress_rulespec="fwmark 0xA00/0xF00 pref 10 lookup $PROXY_RT_TABLE"
from_ingress_rulespec="fwmark 0xA00/0xF00 pref 10 lookup $PROXY_RT_TABLE proto $DEFAULT_RTPROTO"

# Any packet to an ingress or egress proxy uses a separate routing table
# that routes the packet to the loopback device regardless of the destination
# address in the packet. For this to work the ctx must have a socket set
# (e.g., via TPROXY).
to_proxy_rulespec="fwmark 0x200/0xF00 pref 9 lookup $TO_PROXY_RT_TABLE"
to_proxy_rulespec="fwmark 0x200/0xF00 pref 9 lookup $TO_PROXY_RT_TABLE proto $DEFAULT_RTPROTO"

if [ "$IP4_HOST" != "<nil>" ]; then
if [ -n "$(ip -4 rule list)" ]; then
Expand All @@ -136,14 +138,14 @@ function setup_proxy_rules()
fi

# Traffic to the host proxy is local
ip route replace table $TO_PROXY_RT_TABLE local 0.0.0.0/0 dev lo
ip route replace table $TO_PROXY_RT_TABLE local 0.0.0.0/0 dev lo proto $DEFAULT_RTPROTO
# Traffic from ingress proxy goes to Cilium address space via the cilium host device
if [ "$ENDPOINT_ROUTES" = "true" ]; then
ip route delete table $PROXY_RT_TABLE $IP4_HOST/32 dev $HOST_DEV1 2>/dev/null || true
ip route delete table $PROXY_RT_TABLE default via $IP4_HOST 2>/dev/null || true
else
ip route replace table $PROXY_RT_TABLE $IP4_HOST/32 dev $HOST_DEV1
ip route replace table $PROXY_RT_TABLE default via $IP4_HOST
ip route replace table $PROXY_RT_TABLE $IP4_HOST/32 dev $HOST_DEV1 proto $DEFAULT_RTPROTO
ip route replace table $PROXY_RT_TABLE default via $IP4_HOST proto $DEFAULT_RTPROTO
fi
else
ip -4 rule del $to_proxy_rulespec 2> /dev/null || true
Expand All @@ -169,14 +171,14 @@ function setup_proxy_rules()
IP6_LLADDR=$(ip -6 addr show dev $HOST_DEV2 | grep inet6 | head -1 | awk '{print $2}' | awk -F'/' '{print $1}')
if [ -n "$IP6_LLADDR" ]; then
# Traffic to the host proxy is local
ip -6 route replace table $TO_PROXY_RT_TABLE local ::/0 dev lo
ip -6 route replace table $TO_PROXY_RT_TABLE local ::/0 dev lo proto $DEFAULT_RTPROTO
# Traffic from ingress proxy goes to Cilium address space via the cilium host device
if [ "$ENDPOINT_ROUTES" = "true" ]; then
ip -6 route delete table $PROXY_RT_TABLE ${IP6_LLADDR}/128 dev $HOST_DEV1 2>/dev/null || true
ip -6 route delete table $PROXY_RT_TABLE default via $IP6_LLADDR dev $HOST_DEV1 2>/dev/null || true
else
ip -6 route replace table $PROXY_RT_TABLE ${IP6_LLADDR}/128 dev $HOST_DEV1
ip -6 route replace table $PROXY_RT_TABLE default via $IP6_LLADDR dev $HOST_DEV1
ip -6 route replace table $PROXY_RT_TABLE ${IP6_LLADDR}/128 dev $HOST_DEV1 proto $DEFAULT_RTPROTO
ip -6 route replace table $PROXY_RT_TABLE default via $IP6_LLADDR dev $HOST_DEV1 proto $DEFAULT_RTPROTO
fi
fi
else
Expand Down
2 changes: 1 addition & 1 deletion bugtool/cmd/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func defaultCommands(confDir string, cmdDir string, k8sPods []string) []string {
"ip6tables-nft-save -c",
"iptables-legacy-save -c",
"ip6tables-legacy-save -c",
"ip rule",
"ip -d rule",
"ipset list",
// xfrm
"ip -s xfrm policy",
Expand Down
4 changes: 4 additions & 0 deletions daemon/cmd/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,10 @@ func (d *Daemon) init() error {
return fmt.Errorf("failed while reinitializing datapath: %w", err)
}

if err := linuxdatapath.NodeEnsureLocalIPRule(); err != nil {
return fmt.Errorf("failed to ensure local IP rules: %w", err)
}

if option.Config.SockopsEnable {
eppolicymap.CreateEPPolicyMap()
if err := sockops.SockmapEnable(); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ require (
github.com/stretchr/testify v1.8.2
github.com/tidwall/gjson v1.14.4
github.com/tidwall/sjson v1.2.5
github.com/vishvananda/netlink v1.2.1-beta.2.0.20220608195807-1a118fe229fc
github.com/vishvananda/netlink v1.2.1-beta.2.0.20230316163032-ced5aaba43e3
github.com/vishvananda/netns v0.0.4
go.etcd.io/etcd/api/v3 v3.5.7
go.etcd.io/etcd/client/pkg/v3 v3.5.7
Expand Down
6 changes: 3 additions & 3 deletions go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions pkg/datapath/linux/linux_defaults/linux_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package linux_defaults

import (
"time"

"golang.org/x/sys/unix"
)

// Linux specific constants used in Linux datapath
Expand Down Expand Up @@ -61,8 +63,8 @@ const (
// RouterMarkNodePort
MaskMultinodeNodeport = 0x80

// IPSecProtocolID IP protocol ID for IPSec defined in RFC4303
RouteProtocolIPSec = 50
// RTProto is the default protocol we install our fib rules and routes with
RTProto = unix.RTPROT_KERNEL

// RulePriorityWireguard is the priority of the rule used for routing packets to Wireguard device for encryption
RulePriorityWireguard = 1
Expand All @@ -78,6 +80,10 @@ const (
// before the local table priority.
RulePriorityIngress = 20

// RulePriorityLocalLookup is the priority for the local lookup rule which is
// moved on init from 0
RulePriorityLocalLookup = 100

// RulePriorityEgress is the priority of the rule used for egress routing
// of endpoints. This priority is after the local table priority.
RulePriorityEgress = 110
Expand Down
103 changes: 98 additions & 5 deletions pkg/datapath/linux/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,9 @@ func createDirectRouteSpec(CIDR *cidr.CIDR, nodeIP net.IP) (routeSpec *netlink.R
var routes []netlink.Route

routeSpec = &netlink.Route{
Dst: CIDR.IPNet,
Gw: nodeIP,
Dst: CIDR.IPNet,
Gw: nodeIP,
Protocol: linux_defaults.RTProto,
}

routes, err = netlink.RouteGet(nodeIP)
Expand Down Expand Up @@ -314,8 +315,9 @@ func (n *linuxNodeHandler) deleteDirectRoute(CIDR *cidr.CIDR, nodeIP net.IP) {
}

filter := &netlink.Route{
Dst: CIDR.IPNet,
Gw: nodeIP,
Dst: CIDR.IPNet,
Gw: nodeIP,
Protocol: linux_defaults.RTProto,
}

routes, err := netlink.RouteListFiltered(family, filter, netlink.RT_FILTER_DST|netlink.RT_FILTER_GW)
Expand Down Expand Up @@ -383,6 +385,7 @@ func (n *linuxNodeHandler) createNodeRouteSpec(prefix *cidr.CIDR, isLocalNode bo
Prefix: *prefix.IPNet,
MTU: mtu,
Priority: option.Config.RouteMetric,
Proto: linux_defaults.RTProto,
}, nil
}

Expand Down Expand Up @@ -1235,6 +1238,7 @@ func (n *linuxNodeHandler) replaceHostRules() error {
Priority: 1,
Mask: linux_defaults.RouteMarkMask,
Table: linux_defaults.RouteTableIPSec,
Protocol: linux_defaults.RTProto,
}

if n.nodeConfig.EnableIPv4 {
Expand Down Expand Up @@ -1273,6 +1277,7 @@ func (n *linuxNodeHandler) removeEncryptRules() error {
Priority: 1,
Mask: linux_defaults.RouteMarkMask,
Table: linux_defaults.RouteTableIPSec,
Protocol: linux_defaults.RTProto,
}

rule.Mark = linux_defaults.RouteMarkDecrypt
Expand Down Expand Up @@ -1322,7 +1327,7 @@ func (n *linuxNodeHandler) createNodeIPSecInRoute(ip *net.IPNet) route.Route {
Device: device,
Prefix: *ip,
Table: linux_defaults.RouteTableIPSec,
Proto: linux_defaults.RouteProtocolIPSec,
Proto: linux_defaults.RTProto,
Type: route.RTN_LOCAL,
}
}
Expand All @@ -1334,6 +1339,7 @@ func (n *linuxNodeHandler) createNodeIPSecOutRoute(ip *net.IPNet) route.Route {
Prefix: *ip,
Table: linux_defaults.RouteTableIPSec,
MTU: n.nodeConfig.MtuConfig.GetRoutePostEncryptMTU(),
Proto: linux_defaults.RTProto,
}
}

Expand Down Expand Up @@ -1877,3 +1883,90 @@ func NodeDeviceNameWithDefaultRoute() (string, error) {
}
return link.Attrs().Name, nil
}

func deleteOldLocalRule(rule route.Rule, family int) error {
var familyStr string

// sanity check, nothing to do if the rule is the same
if linux_defaults.RTProto == unix.RTPROT_UNSPEC {
return nil
}

if family == netlink.FAMILY_V4 {
familyStr = "IPv4"
} else {
familyStr = "IPv6"
}

localRules, err := route.ListRules(family, &rule)
if err != nil {
return fmt.Errorf("could not list local %s rules: %w", familyStr, err)
}

// we need to check for the old rule and make sure it's before the new one
oldPos := -1
found := false
for pos, rule := range localRules {
// mark the first unspec rule that matches
if oldPos == -1 && rule.Protocol == unix.RTPROT_UNSPEC {
oldPos = pos
}

if rule.Protocol == linux_defaults.RTProto {
// mark it as found only if it's before the new one
if oldPos != -1 {
found = true
}
break
}
}

if found == true {
err := route.DeleteRule(rule)
if err != nil {
return fmt.Errorf("could not delete old %s local rule: %w", familyStr, err)
}
log.WithFields(logrus.Fields{"family": familyStr}).Info("Deleting old local lookup rule")
}

return nil
}

// NodeEnsureLocalIPRule checks if Cilium local lookup rule (usually 100)
// was installed and has proper protocol
func NodeEnsureLocalIPRule() error {
// we have the Cilium local lookup rule only if the proxy rule is present
if !option.Config.InstallIptRules || !option.Config.EnableL7Proxy {
return nil
}

localRule := route.Rule{Priority: linux_defaults.RulePriorityLocalLookup, Table: unix.RT_TABLE_LOCAL, Mark: -1, Mask: -1, Protocol: linux_defaults.RTProto}
oldRule := localRule
oldRule.Protocol = unix.RTPROT_UNSPEC

if option.Config.EnableIPv4 {
err := route.ReplaceRule(localRule)
if err != nil {
return fmt.Errorf("could not replace IPv4 local rule: %w", err)
}

err = deleteOldLocalRule(oldRule, netlink.FAMILY_V4)
if err != nil {
return err
}
}

if option.Config.EnableIPv6 {
err := route.ReplaceRuleIPv6(localRule)
if err != nil {
return fmt.Errorf("could not replace IPv6 local rule: %w", err)
}

err = deleteOldLocalRule(oldRule, netlink.FAMILY_V6)
if err != nil {
return err
}
}

return nil
}
14 changes: 14 additions & 0 deletions pkg/datapath/linux/route/route_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (

"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"

"github.com/cilium/cilium/pkg/datapath/linux/linux_defaults"
)

const (
Expand Down Expand Up @@ -174,6 +176,7 @@ func createNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) *n
LinkIndex: link.Attrs().Index,
Dst: routerNet,
Table: route.Table,
Protocol: linux_defaults.RTProto,
}

// Known issue: scope for IPv6 routes is not propagated correctly. If
Expand Down Expand Up @@ -320,6 +323,9 @@ type Rule struct {

// Table is the routing table to look up if the rule matches
Table int

// Protocol is the routing rule protocol (e.g. proto unspec/kernel)
Protocol uint8
}

// String returns the string representation of a Rule (adhering to the Stringer
Expand Down Expand Up @@ -355,6 +361,8 @@ func (r Rule) String() string {
str += fmt.Sprintf(" mark 0x%x mask 0x%x", r.Mark, r.Mask)
}

str += fmt.Sprintf(" proto %s", netlink.RouteProtocol(r.Protocol))

return str
}

Expand All @@ -380,6 +388,10 @@ func lookupRule(spec Rule, family int) (bool, error) {
continue
}

if spec.Protocol != 0 && r.Protocol != spec.Protocol {
continue
}

if r.Table == spec.Table {
return true, nil
}
Expand Down Expand Up @@ -458,6 +470,7 @@ func replaceRule(spec Rule, family int) error {
rule.Priority = spec.Priority
rule.Src = spec.From
rule.Dst = spec.To
rule.Protocol = spec.Protocol
return netlink.RuleAdd(rule)
}

Expand All @@ -480,6 +493,7 @@ func deleteRule(spec Rule, family int) error {
rule.Src = spec.From
rule.Dst = spec.To
rule.Family = family
rule.Protocol = spec.Protocol
return netlink.RuleDel(rule)
}

Expand Down