Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.7 backports 2020-04-01 #10818

Merged
merged 10 commits into from
Apr 2, 2020
1 change: 1 addition & 0 deletions Documentation/kubernetes/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ filesystems, the mount point path must be reflected in the unit filename.
What=bpffs
Where=/sys/fs/bpf
Type=bpf
Options=rw,nosuid,nodev,noexec,relatime,mode=700

[Install]
WantedBy=multi-user.target
Expand Down
15 changes: 10 additions & 5 deletions bpf/bpf_lxc.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb,
* reverse NAT.
*/
ct_state_new.src_sec_id = SECLABEL;
ret = ct_create6(get_ct_map6(tuple), tuple, skb, CT_EGRESS, &ct_state_new, verdict > 0);
ret = ct_create6(get_ct_map6(tuple), &CT_MAP_ANY6, tuple, skb, CT_EGRESS, &ct_state_new, verdict > 0);
if (IS_ERR(ret))
return ret;
monitor = TRACE_PAYLOAD_LEN;
Expand Down Expand Up @@ -538,8 +538,11 @@ static inline int handle_ipv4_from_lxc(struct __sk_buff *skb, __u32 *dstID)
* reverse NAT.
*/
ct_state_new.src_sec_id = SECLABEL;
ret = ct_create4(get_ct_map4(&tuple), &tuple, skb, CT_EGRESS,
&ct_state_new, verdict > 0);
/* We could avoid creating related entries for legacy ClusterIP
* handling here, but turns out that verifier cannot handle it.
*/
ret = ct_create4(get_ct_map4(&tuple), &CT_MAP_ANY4, &tuple, skb,
CT_EGRESS, &ct_state_new, verdict > 0);
if (IS_ERR(ret))
return ret;
break;
Expand Down Expand Up @@ -876,7 +879,8 @@ ipv6_policy(struct __sk_buff *skb, int ifindex, __u32 src_label, __u8 *reason, _
ct_state_new.orig_dport = tuple.dport;
ct_state_new.src_sec_id = src_label;
ct_state_new.node_port = ct_state.node_port;
ret = ct_create6(get_ct_map6(&tuple), &tuple, skb, CT_INGRESS, &ct_state_new, verdict > 0);
ret = ct_create6(get_ct_map6(&tuple), &CT_MAP_ANY6, &tuple, skb, CT_INGRESS,
&ct_state_new, verdict > 0);
if (IS_ERR(ret))
return ret;

Expand Down Expand Up @@ -1085,7 +1089,8 @@ ipv4_policy(struct __sk_buff *skb, int ifindex, __u32 src_label, __u8 *reason, _
ct_state_new.orig_dport = tuple.dport;
ct_state_new.src_sec_id = src_label;
ct_state_new.node_port = ct_state.node_port;
ret = ct_create4(get_ct_map4(&tuple), &tuple, skb, CT_INGRESS, &ct_state_new, verdict > 0);
ret = ct_create4(get_ct_map4(&tuple), &CT_MAP_ANY4, &tuple, skb, CT_INGRESS,
&ct_state_new, verdict > 0);
if (IS_ERR(ret))
return ret;

Expand Down
6 changes: 3 additions & 3 deletions bpf/include/bpf/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@
#endif

/* Map access/manipulation */
static void *BPF_FUNC(map_lookup_elem, void *map, const void *key);
static int BPF_FUNC(map_update_elem, void *map, const void *key,
static void *BPF_FUNC(map_lookup_elem, const void *map, const void *key);
static int BPF_FUNC(map_update_elem, const void *map, const void *key,
const void *value, uint32_t flags);
static int BPF_FUNC(map_delete_elem, void *map, const void *key);
static int BPF_FUNC(map_delete_elem, const void *map, const void *key);

/* Time access */
static uint64_t BPF_FUNC(ktime_get_ns);
Expand Down
102 changes: 54 additions & 48 deletions bpf/lib/conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -647,9 +647,11 @@ ct_update6_rev_nat_index(void *map, struct ipv6_ct_tuple *tuple,
}

/* Offset must point to IPv6 */
static inline int __inline__ ct_create6(void *map, struct ipv6_ct_tuple *tuple,
struct __sk_buff *skb, int dir,
struct ct_state *ct_state, bool proxy_redirect)
static __always_inline int ct_create6(const void *map_main, const void *map_related,
struct ipv6_ct_tuple *tuple,
struct __sk_buff *skb, int dir,
struct ct_state *ct_state,
bool proxy_redirect)
{
/* Create entry in original direction */
struct ct_entry entry = { };
Expand Down Expand Up @@ -684,30 +686,26 @@ static inline int __inline__ ct_create6(void *map, struct ipv6_ct_tuple *tuple,
cilium_dbg3(skb, DBG_CT_CREATED6, entry.rev_nat_index, ct_state->src_sec_id, 0);

entry.src_sec_id = ct_state->src_sec_id;
if (map_update_elem(map, tuple, &entry, 0) < 0)
if (map_update_elem(map_main, tuple, &entry, 0) < 0)
return DROP_CT_CREATE_FAILED;

/* Create an ICMPv6 entry to relate errors */
struct ipv6_ct_tuple icmp_tuple = {
.nexthdr = IPPROTO_ICMPV6,
.sport = 0,
.dport = 0,
.flags = tuple->flags | TUPLE_F_RELATED,
};
if (map_related != NULL) {
/* Create an ICMPv6 entry to relate errors */
struct ipv6_ct_tuple icmp_tuple = {
.nexthdr = IPPROTO_ICMPV6,
.sport = 0,
.dport = 0,
.flags = tuple->flags | TUPLE_F_RELATED,
};

entry.seen_non_syn = true; /* For ICMP, there is no SYN. */
entry.seen_non_syn = true; /* For ICMP, there is no SYN. */

ipv6_addr_copy(&icmp_tuple.daddr, &tuple->daddr);
ipv6_addr_copy(&icmp_tuple.saddr, &tuple->saddr);
ipv6_addr_copy(&icmp_tuple.daddr, &tuple->daddr);
ipv6_addr_copy(&icmp_tuple.saddr, &tuple->saddr);

/* FIXME: We could do a lookup and check if an L3 entry already exists */
if (map_update_elem(map, &icmp_tuple, &entry, 0) < 0) {
/* Previous map update succeeded, we could delete it
* but we might as well just let it time out.
*/
return DROP_CT_CREATE_FAILED;
if (map_update_elem(map_related, &icmp_tuple, &entry, 0) < 0)
return DROP_CT_CREATE_FAILED;
}

return 0;
}

Expand Down Expand Up @@ -741,9 +739,11 @@ ct_update4_rev_nat_index(void *map, struct ipv4_ct_tuple *tuple,
return;
}

static inline int __inline__ ct_create4(void *map, struct ipv4_ct_tuple *tuple,
struct __sk_buff *skb, int dir,
struct ct_state *ct_state, bool proxy_redirect)
static __always_inline int ct_create4(const void *map_main, const void *map_related,
struct ipv4_ct_tuple *tuple,
struct __sk_buff *skb, int dir,
struct ct_state *ct_state,
bool proxy_redirect)
{
/* Create entry in original direction */
struct ct_entry entry = { };
Expand Down Expand Up @@ -791,7 +791,7 @@ static inline int __inline__ ct_create4(void *map, struct ipv4_ct_tuple *tuple,
cilium_dbg3(skb, DBG_CT_CREATED4, entry.rev_nat_index, ct_state->src_sec_id, ct_state->addr);

entry.src_sec_id = ct_state->src_sec_id;
if (map_update_elem(map, tuple, &entry, 0) < 0)
if (map_update_elem(map_main, tuple, &entry, 0) < 0)
return DROP_CT_CREATE_FAILED;

if (ct_state->addr && ct_state->loopback) {
Expand All @@ -814,29 +814,32 @@ static inline int __inline__ ct_create4(void *map, struct ipv4_ct_tuple *tuple,
tuple->daddr = ct_state->addr;
}

if (map_update_elem(map, tuple, &entry, 0) < 0)
if (map_update_elem(map_main, tuple, &entry, 0) < 0)
return DROP_CT_CREATE_FAILED;
tuple->saddr = saddr;
tuple->daddr = daddr;
tuple->flags = flags;
}

/* Create an ICMP entry to relate errors */
struct ipv4_ct_tuple icmp_tuple = {
.daddr = tuple->daddr,
.saddr = tuple->saddr,
.nexthdr = IPPROTO_ICMP,
.sport = 0,
.dport = 0,
.flags = tuple->flags | TUPLE_F_RELATED,
};

entry.seen_non_syn = true; /* For ICMP, there is no SYN. */

/* FIXME: We could do a lookup and check if an L3 entry already exists */
if (map_update_elem(map, &icmp_tuple, &entry, 0) < 0)
return DROP_CT_CREATE_FAILED;

if (map_related != NULL) {
/* Create an ICMP entry to relate errors */
struct ipv4_ct_tuple icmp_tuple = {
.daddr = tuple->daddr,
.saddr = tuple->saddr,
.nexthdr = IPPROTO_ICMP,
.sport = 0,
.dport = 0,
.flags = tuple->flags | TUPLE_F_RELATED,
};

entry.seen_non_syn = true; /* For ICMP, there is no SYN. */
/* Previous map update succeeded, we could delete it in case
* the below throws an error, but we might as well just let
* it time out.
*/
if (map_update_elem(map_related, &icmp_tuple, &entry, 0) < 0)
return DROP_CT_CREATE_FAILED;
}
return 0;
}

Expand Down Expand Up @@ -867,9 +870,11 @@ ct_update6_rev_nat_index(void *map, struct ipv6_ct_tuple *tuple,
{
}

static inline int __inline__ ct_create6(void *map, struct ipv6_ct_tuple *tuple,
struct __sk_buff *skb, int dir,
struct ct_state *ct_state, bool from_proxy)
static __always_inline int ct_create6(const void *map_main, const void *map_related,
struct ipv6_ct_tuple *tuple,
struct __sk_buff *skb, int dir,
struct ct_state *ct_state,
bool from_proxy)
{
return 0;
}
Expand All @@ -886,9 +891,10 @@ ct_update4_rev_nat_index(void *map, struct ipv4_ct_tuple *tuple,
{
}

static inline int __inline__ ct_create4(void *map, struct ipv4_ct_tuple *tuple,
struct __sk_buff *skb, int dir,
struct ct_state *ct_state, bool from_proxy)
static __always_inline int ct_create4(const void *map_main, const void *map_related,
struct ipv4_ct_tuple *tuple,
struct __sk_buff *skb, int dir,
struct ct_state *ct_state, bool from_proxy)
{
return 0;
}
Expand Down
4 changes: 2 additions & 2 deletions bpf/lib/lb.h
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ static inline int __inline__ lb6_local(void *map, struct __sk_buff *skb,
}
state->backend_id = slave_svc->backend_id;
state->rev_nat_index = svc->rev_nat_index;
ret = ct_create6(map, tuple, skb, CT_SERVICE, state, false);
ret = ct_create6(map, NULL, tuple, skb, CT_SERVICE, state, false);
/* Fail closed, if the conntrack entry create fails drop
* service lookup.
*/
Expand Down Expand Up @@ -822,7 +822,7 @@ static inline int __inline__ lb4_local(void *map, struct __sk_buff *skb,
}
state->backend_id = slave_svc->backend_id;
state->rev_nat_index = svc->rev_nat_index;
ret = ct_create4(map, tuple, skb, CT_SERVICE, state, false);
ret = ct_create4(map, NULL, tuple, skb, CT_SERVICE, state, false);
/* Fail closed, if the conntrack entry create fails drop
* service lookup.
*/
Expand Down
4 changes: 2 additions & 2 deletions bpf/lib/nat.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ static __always_inline int snat_v4_track_local(struct __sk_buff *skb,
if (ret < 0) {
return ret;
} else if (ret == CT_NEW) {
ret = ct_create4(get_ct_map4(&tmp), &tmp, skb, where,
ret = ct_create4(get_ct_map4(&tmp), NULL, &tmp, skb, where,
&ct_state, false);
if (IS_ERR(ret))
return ret;
Expand Down Expand Up @@ -753,7 +753,7 @@ static __always_inline int snat_v6_track_local(struct __sk_buff *skb,
if (ret < 0) {
return ret;
} else if (ret == CT_NEW) {
ret = ct_create6(get_ct_map6(&tmp), &tmp, skb, where,
ret = ct_create6(get_ct_map6(&tmp), NULL, &tmp, skb, where,
&ct_state, false);
if (IS_ERR(ret))
return ret;
Expand Down
12 changes: 6 additions & 6 deletions bpf/lib/nodeport.h
Original file line number Diff line number Diff line change
Expand Up @@ -471,15 +471,15 @@ static inline int nodeport_lb6(struct __sk_buff *skb, __u32 src_identity)
case CT_NEW:
ct_state_new.src_sec_id = SECLABEL;
ct_state_new.node_port = 1;
ret = ct_create6(get_ct_map6(&tuple), &tuple, skb, CT_EGRESS,
&ct_state_new, false);
ret = ct_create6(get_ct_map6(&tuple), NULL, &tuple, skb,
CT_EGRESS, &ct_state_new, false);
if (IS_ERR(ret))
return ret;
if (backend_local) {
ct_flip_tuple_dir6(&tuple);
redo:
ct_state_new.rev_nat_index = 0;
ret = ct_create6(get_ct_map6(&tuple), &tuple, skb,
ret = ct_create6(get_ct_map6(&tuple), NULL, &tuple, skb,
CT_INGRESS, &ct_state_new, false);
if (IS_ERR(ret))
return ret;
Expand Down Expand Up @@ -1020,15 +1020,15 @@ static inline int nodeport_lb4(struct __sk_buff *skb, __u32 src_identity)
case CT_NEW:
ct_state_new.src_sec_id = SECLABEL;
ct_state_new.node_port = 1;
ret = ct_create4(get_ct_map4(&tuple), &tuple, skb, CT_EGRESS,
&ct_state_new, false);
ret = ct_create4(get_ct_map4(&tuple), NULL, &tuple, skb,
CT_EGRESS, &ct_state_new, false);
if (IS_ERR(ret))
return ret;
if (backend_local) {
ct_flip_tuple_dir4(&tuple);
redo:
ct_state_new.rev_nat_index = 0;
ret = ct_create4(get_ct_map4(&tuple), &tuple, skb,
ret = ct_create4(get_ct_map4(&tuple), NULL, &tuple, skb,
CT_INGRESS, &ct_state_new, false);
if (IS_ERR(ret))
return ret;
Expand Down
1 change: 1 addition & 0 deletions contrib/systemd/sys-fs-bpf.mount
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ After=swap.target
What=bpffs
Where=/sys/fs/bpf
Type=bpf
Options=rw,nosuid,nodev,noexec,relatime,mode=700

[Install]
WantedBy=multi-user.target
2 changes: 1 addition & 1 deletion daemon/bpf.sha
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
GO_BINDATA_SHA1SUM=a7b6e8057bbe7fe5b50147079aa6704bc74acac1
GO_BINDATA_SHA1SUM=ce5f744ba786c4ca3021843e103345152516996d
BPF_FILES=../bpf/COPYING ../bpf/Makefile ../bpf/Makefile.bpf ../bpf/bpf_alignchecker.c ../bpf/bpf_features.h ../bpf/bpf_hostdev_ingress.c ../bpf/bpf_ipsec.c ../bpf/bpf_lxc.c ../bpf/bpf_netdev.c ../bpf/bpf_network.c ../bpf/bpf_overlay.c ../bpf/bpf_sock.c ../bpf/bpf_xdp.c ../bpf/cilium-map-migrate.c ../bpf/filter_config.h ../bpf/include/bpf/api.h ../bpf/include/elf/elf.h ../bpf/include/elf/gelf.h ../bpf/include/elf/libelf.h ../bpf/include/iproute2/bpf_elf.h ../bpf/include/linux/bpf.h ../bpf/include/linux/bpf_common.h ../bpf/include/linux/byteorder.h ../bpf/include/linux/byteorder/big_endian.h ../bpf/include/linux/byteorder/little_endian.h ../bpf/include/linux/icmp.h ../bpf/include/linux/icmpv6.h ../bpf/include/linux/if_arp.h ../bpf/include/linux/if_ether.h ../bpf/include/linux/if_packet.h ../bpf/include/linux/in.h ../bpf/include/linux/in6.h ../bpf/include/linux/ioctl.h ../bpf/include/linux/ip.h ../bpf/include/linux/ipv6.h ../bpf/include/linux/perf_event.h ../bpf/include/linux/swab.h ../bpf/include/linux/tcp.h ../bpf/include/linux/type_mapper.h ../bpf/include/linux/udp.h ../bpf/init.sh ../bpf/lib/arp.h ../bpf/lib/common.h ../bpf/lib/config.h ../bpf/lib/conntrack.h ../bpf/lib/conntrack_map.h ../bpf/lib/conntrack_test.h ../bpf/lib/csum.h ../bpf/lib/dbg.h ../bpf/lib/drop.h ../bpf/lib/encap.h ../bpf/lib/eps.h ../bpf/lib/eth.h ../bpf/lib/events.h ../bpf/lib/icmp6.h ../bpf/lib/identity.h ../bpf/lib/ipv4.h ../bpf/lib/ipv6.h ../bpf/lib/ipv6_test.h ../bpf/lib/l3.h ../bpf/lib/l4.h ../bpf/lib/lb.h ../bpf/lib/lxc.h ../bpf/lib/maps.h ../bpf/lib/metrics.h ../bpf/lib/nat.h ../bpf/lib/nat46.h ../bpf/lib/nodeport.h ../bpf/lib/policy.h ../bpf/lib/signal.h ../bpf/lib/tailcall.h ../bpf/lib/trace.h ../bpf/lib/utils.h ../bpf/lib/xdp.h ../bpf/lxc_config.h ../bpf/netdev_config.h ../bpf/node_config.h ../bpf/probes/raw_change_tail.t ../bpf/probes/raw_fib_lookup.t ../bpf/probes/raw_insn.h ../bpf/probes/raw_invalidate_hash.t ../bpf/probes/raw_lpm_map.t ../bpf/probes/raw_lru_map.t ../bpf/probes/raw_main.c ../bpf/probes/raw_max_insn.t ../bpf/probes/raw_sock_cookie.t ../bpf/run_probes.sh ../bpf/sockops/Makefile ../bpf/sockops/bpf_redir.c ../bpf/sockops/bpf_sockops.c ../bpf/sockops/bpf_sockops.h ../bpf/sockops/sockops_config.h
6 changes: 6 additions & 0 deletions daemon/daemon_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,12 @@ func runDaemon() {
errs <- svr.Serve()
}()

if k8s.IsEnabled() {
bootstrapStats.k8sInit.Start()
k8s.Client().MarkNodeReady(node.GetName())
bootstrapStats.k8sInit.End(true)
}

bootstrapStats.overall.End(true)
bootstrapStats.updateMetrics()

Expand Down
2 changes: 1 addition & 1 deletion daemon/sysctl_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type DaemonPrivilegedSuite struct{}

var _ = Suite(&DaemonPrivilegedSuite{})

func (s *DaemonPrivilegedSuite) TestInitSysctlParams(c *C) {
func (s *DaemonPrivilegedSuite) TestEnableIPForwarding(c *C) {
err := enableIPForwarding()
c.Assert(err, IsNil)
}