Skip to content

Commit

Permalink
bpf: Report original source IP in TRACE_TO_LXC
Browse files Browse the repository at this point in the history
In order to correlate flow events on multiple nodes, a possible service IP
translation must be known. The TRACE_TO_LXC event is situated in a position to
report this as reverse translation has been performed just before. Extend
the trace_notify structure to include the original source IP if available.

Example:
```
-> endpoint 2614 flow 0x5b9d9e5a identity 56995->31468 state new ifindex lxcc3be4eeac8fe orig-ip 10.16.182.13: 10.16.182.13:33020 -> 10.16.228.151:80 tcp SYN
-> endpoint 3745 flow 0xa4419b6f identity 31468->56995 state reply ifindex lxc155152faaf3f orig-ip 10.16.228.151: 172.20.0.187:80 -> 10.16.182.13:33020 tcp ACK, RST
```

Signed-off-by: Thomas Graf <thomas@cilium.io>
  • Loading branch information
tgraf authored and ianvernon committed Oct 17, 2019
1 parent 3995474 commit b3aa583
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 66 deletions.
11 changes: 6 additions & 5 deletions bpf/bpf_lxc.c
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ ipv6_policy(struct __sk_buff *skb, int ifindex, __u32 src_label, __u8 *reason)
struct ct_state ct_state = {};
struct ct_state ct_state_new = {};
bool skip_ingress_proxy = false;
union v6addr orig_dip = {};
union v6addr orig_dip, orig_sip;
__u32 monitor = 0;

if (!revalidate_data(skb, &data, &data_end, &ip6))
Expand All @@ -772,6 +772,7 @@ ipv6_policy(struct __sk_buff *skb, int ifindex, __u32 src_label, __u8 *reason)
ipv6_addr_copy(&tuple.daddr, (union v6addr *) &ip6->daddr);
ipv6_addr_copy(&tuple.saddr, (union v6addr *) &ip6->saddr);
ipv6_addr_copy(&orig_dip, (union v6addr *) &ip6->daddr);
ipv6_addr_copy(&orig_sip, (union v6addr *) &ip6->saddr);

/* If packet is coming from the ingress proxy we have to skip
* redirection to the ingress proxy as we would loop forever. */
Expand Down Expand Up @@ -854,11 +855,11 @@ ipv6_policy(struct __sk_buff *skb, int ifindex, __u32 src_label, __u8 *reason)

if (redirect_to_proxy(verdict, *reason)) {
// Trace the packet before its forwarded to proxy
send_trace_notify(skb, TRACE_TO_PROXY, src_label, SECLABEL,
send_trace_notify6(skb, TRACE_TO_PROXY, src_label, SECLABEL, &orig_sip,
0, ifindex, *reason, monitor);
return skb_redirect_to_proxy(skb, verdict);
} else { // Not redirected to host / proxy.
send_trace_notify(skb, TRACE_TO_LXC, src_label, SECLABEL,
send_trace_notify6(skb, TRACE_TO_LXC, src_label, SECLABEL, &orig_sip,
LXC_ID, ifindex, *reason, monitor);
}

Expand Down Expand Up @@ -1043,11 +1044,11 @@ ipv4_policy(struct __sk_buff *skb, int ifindex, __u32 src_label, __u8 *reason, _
if (redirect_to_proxy(verdict, *reason)) {
*proxy_port = verdict;
// Trace the packet before its forwarded to proxy
send_trace_notify(skb, TRACE_TO_PROXY, src_label, SECLABEL,
send_trace_notify4(skb, TRACE_TO_PROXY, src_label, SECLABEL, orig_sip,
0, ifindex, *reason, monitor);
return TC_ACT_OK;
} else { // Not redirected to host / proxy.
send_trace_notify(skb, TRACE_TO_LXC, src_label, SECLABEL,
send_trace_notify4(skb, TRACE_TO_LXC, src_label, SECLABEL, orig_sip,
LXC_ID, ifindex, *reason, monitor);
}

Expand Down
196 changes: 142 additions & 54 deletions bpf/lib/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,36 +69,16 @@ enum {
#define MONITOR_AGGREGATION TRACE_AGGREGATE_NONE
#endif

#ifdef TRACE_NOTIFY

struct trace_notify {
NOTIFY_COMMON_HDR
__u32 len_orig;
__u32 len_cap;
__u32 src_label;
__u32 dst_label;
__u16 dst_id;
__u8 reason;
__u8 pad;
__u32 ifindex;
};

/**
* send_trace_notify
* update_trace_metrics
* @skb: socket buffer
* @obs_point: observation point (TRACE_*)
* @src: source identity
* @dst: destination identity
* @dst_id: designated destination endpoint ID
* @ifindex: designated destination ifindex
* @reason: reason for forwarding the packet (TRACE_REASON_*)
* @monitor: length of notification to send (0 means don't send)
*
* Generate a notification to indicate a packet was forwarded at an observation point.
* Update metrics based on a trace event
*/
static inline void
send_trace_notify(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst,
__u16 dst_id, __u32 ifindex, __u8 reason, __u32 monitor)
update_trace_metrics(struct __sk_buff *skb, __u8 obs_point, __u8 reason)
{
__u8 encrypted;

Expand Down Expand Up @@ -130,23 +110,103 @@ send_trace_notify(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst,
update_metrics(skb->len, METRIC_INGRESS, REASON_DECRYPT);
break;
}
}

#ifdef TRACE_NOTIFY

struct trace_notify {
NOTIFY_COMMON_HDR
__u32 len_orig;
__u32 len_cap;
__u32 src_label;
__u32 dst_label;
__u16 dst_id;
__u8 reason;
__u8 ipv6:1;
__u8 pad:7;
__u32 ifindex;
union {
struct {
__be32 orig_ip4;
__u32 orig_pad1;
__u32 orig_pad2;
__u32 orig_pad3;
};
union v6addr orig_ip6;
};
};

static inline bool emit_trace_notify(__u8 obs_point, __u32 monitor)
{
if (MONITOR_AGGREGATION >= TRACE_AGGREGATE_RX) {
switch (obs_point) {
case TRACE_FROM_LXC:
case TRACE_FROM_PROXY:
case TRACE_FROM_HOST:
case TRACE_FROM_STACK:
case TRACE_FROM_OVERLAY:
return;
return false;
default:
break;
}
}

if (MONITOR_AGGREGATION >= TRACE_AGGREGATE_ACTIVE_CT && !monitor)
return false;

return true;
}

static inline void
send_trace_notify(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst,
__u16 dst_id, __u32 ifindex, __u8 reason, __u32 monitor)
{
update_trace_metrics(skb, obs_point, reason);

if (!emit_trace_notify(obs_point, monitor))
return;

if (!monitor)
monitor = TRACE_PAYLOAD_LEN;

uint64_t skb_len = (uint64_t)skb->len, cap_len = min((uint64_t)monitor, (uint64_t)skb_len);
uint32_t hash = get_hash_recalc(skb);
struct trace_notify msg = {
.type = CILIUM_NOTIFY_TRACE,
.subtype = obs_point,
.source = EVENT_SOURCE,
.hash = hash,
.len_orig = skb_len,
.len_cap = cap_len,
.src_label = src,
.dst_label = dst,
.dst_id = dst_id,
.reason = reason,
.ipv6 = 0,
.pad = 0,
.ifindex = ifindex,
.orig_ip4 = 0,
.orig_pad1 = 0,
.orig_pad2 = 0,
.orig_pad3 = 0,
};
skb_event_output(skb, &EVENTS_MAP,
(cap_len << 32) | BPF_F_CURRENT_CPU,
&msg, sizeof(msg));
}

static inline void
send_trace_notify4(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst, __be32 orig_addr,
__u16 dst_id, __u32 ifindex, __u8 reason, __u32 monitor)
{
update_trace_metrics(skb, obs_point, reason);

if (!emit_trace_notify(obs_point, monitor))
return;

if (!monitor)
monitor = TRACE_PAYLOAD_LEN;

uint64_t skb_len = (uint64_t)skb->len, cap_len = min((uint64_t)monitor, (uint64_t)skb_len);
uint32_t hash = get_hash_recalc(skb);
struct trace_notify msg = {
Expand All @@ -160,49 +220,77 @@ send_trace_notify(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst,
.dst_label = dst,
.dst_id = dst_id,
.reason = reason,
.ipv6 = 0,
.pad = 0,
.ifindex = ifindex,
.orig_ip4 = orig_addr,
.orig_pad1 = 0,
.orig_pad2 = 0,
.orig_pad3 = 0,
};
skb_event_output(skb, &EVENTS_MAP,
(cap_len << 32) | BPF_F_CURRENT_CPU,
&msg, sizeof(msg));
}

static inline void
send_trace_notify6(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst, union v6addr *orig_addr,
__u16 dst_id, __u32 ifindex, __u8 reason, __u32 monitor)
{
update_trace_metrics(skb, obs_point, reason);

if (!emit_trace_notify(obs_point, monitor))
return;

if (!monitor)
monitor = TRACE_PAYLOAD_LEN;

uint64_t skb_len = (uint64_t)skb->len, cap_len = min((uint64_t)monitor, (uint64_t)skb_len);
uint32_t hash = get_hash_recalc(skb);
struct trace_notify msg = {
.type = CILIUM_NOTIFY_TRACE,
.subtype = obs_point,
.source = EVENT_SOURCE,
.hash = hash,
.len_orig = skb_len,
.len_cap = cap_len,
.src_label = src,
.dst_label = dst,
.dst_id = dst_id,
.reason = reason,
.ipv6 = 1,
.pad = 0,
.ifindex = ifindex,
};

ipv6_addr_copy(&msg.orig_ip6, orig_addr);

skb_event_output(skb, &EVENTS_MAP,
(cap_len << 32) | BPF_F_CURRENT_CPU,
&msg, sizeof(msg));
}

#else

static inline void send_trace_notify(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst,
__u16 dst_id, __u32 ifindex, __u8 reason, __u32 monitor)
static inline void
send_trace_notify(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst,
__u16 dst_id, __u32 ifindex, __u8 reason, __u32 monitor)
{
__u8 encrypted;
update_trace_metrics(skb, obs_point, reason);
}

switch (obs_point) {
case TRACE_TO_LXC:
update_metrics(skb->len, METRIC_INGRESS, REASON_FORWARDED);
break;
static inline void
send_trace_notify4(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst, __be32 orig_addr,
__u16 dst_id, __u32 ifindex, __u8 reason, __u32 monitor)
{
update_trace_metrics(skb, obs_point, reason);
}

/* TRACE_FROM_LXC, i.e endpoint-to-endpoint delivery
* is handled separately in ipv*_local_delivery() where we can bump
* an egress forward. It could still be dropped but it would show
* up later as an ingress drop, in that scenario.
*
* TRACE_TO_PROXY is not handled in datapath. This is because we have separate
* L7 proxy "forwarded" and "dropped" (ingress/egress) counters in the proxy layer
* to capture these metrics.
*/
case TRACE_TO_HOST:
case TRACE_TO_STACK:
case TRACE_TO_OVERLAY:
update_metrics(skb->len, METRIC_EGRESS, REASON_FORWARDED);
break;
case TRACE_FROM_OVERLAY:
case TRACE_FROM_NETWORK:
encrypted = reason & TRACE_REASON_ENCRYPTED;
if (!encrypted)
update_metrics(skb->len, METRIC_INGRESS, REASON_PLAINTEXT);
else
update_metrics(skb->len, METRIC_INGRESS, REASON_DECRYPT);
break;
}
static inline void
send_trace_notify6(struct __sk_buff *skb, __u8 obs_point, __u32 src, __u32 dst, union v6addr *orig_addr,
__u16 dst_id, __u32 ifindex, __u8 reason, __u32 monitor)
{
update_trace_metrics(skb, obs_point, reason);
}

#endif
Expand Down
2 changes: 1 addition & 1 deletion daemon/bpf.sha
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
GO_BINDATA_SHA1SUM=2a70b7dcfadcf3df554618125cd4e57f7231ee2b
GO_BINDATA_SHA1SUM=6816e7e3a1c4fc0d6dc823b23688cee5068e61f8
BPF_FILES=../bpf/COPYING ../bpf/Makefile ../bpf/Makefile.bpf ../bpf/bpf_alignchecker.c ../bpf/bpf_features.h ../bpf/bpf_hostdev_ingress.c ../bpf/bpf_ipsec.c ../bpf/bpf_lxc.c ../bpf/bpf_netdev.c ../bpf/bpf_network.c ../bpf/bpf_overlay.c ../bpf/bpf_sock.c ../bpf/bpf_xdp.c ../bpf/cilium-map-migrate.c ../bpf/filter_config.h ../bpf/include/bpf/api.h ../bpf/include/elf/elf.h ../bpf/include/elf/gelf.h ../bpf/include/elf/libelf.h ../bpf/include/iproute2/bpf_elf.h ../bpf/include/linux/bpf.h ../bpf/include/linux/bpf_common.h ../bpf/include/linux/byteorder.h ../bpf/include/linux/byteorder/big_endian.h ../bpf/include/linux/byteorder/little_endian.h ../bpf/include/linux/icmp.h ../bpf/include/linux/icmpv6.h ../bpf/include/linux/if_arp.h ../bpf/include/linux/if_ether.h ../bpf/include/linux/if_packet.h ../bpf/include/linux/in.h ../bpf/include/linux/in6.h ../bpf/include/linux/ioctl.h ../bpf/include/linux/ip.h ../bpf/include/linux/ipv6.h ../bpf/include/linux/perf_event.h ../bpf/include/linux/swab.h ../bpf/include/linux/tcp.h ../bpf/include/linux/type_mapper.h ../bpf/include/linux/udp.h ../bpf/init.sh ../bpf/lib/arp.h ../bpf/lib/common.h ../bpf/lib/config.h ../bpf/lib/conntrack.h ../bpf/lib/conntrack_map.h ../bpf/lib/conntrack_test.h ../bpf/lib/csum.h ../bpf/lib/dbg.h ../bpf/lib/drop.h ../bpf/lib/encap.h ../bpf/lib/eps.h ../bpf/lib/eth.h ../bpf/lib/events.h ../bpf/lib/icmp6.h ../bpf/lib/ipv4.h ../bpf/lib/ipv6.h ../bpf/lib/ipv6_test.h ../bpf/lib/l3.h ../bpf/lib/l4.h ../bpf/lib/lb.h ../bpf/lib/lxc.h ../bpf/lib/maps.h ../bpf/lib/metrics.h ../bpf/lib/nat.h ../bpf/lib/nat46.h ../bpf/lib/nodeport.h ../bpf/lib/policy.h ../bpf/lib/signal.h ../bpf/lib/tailcall.h ../bpf/lib/trace.h ../bpf/lib/utils.h ../bpf/lib/xdp.h ../bpf/lxc_config.h ../bpf/netdev_config.h ../bpf/node_config.h ../bpf/probes/raw_change_tail.t ../bpf/probes/raw_fib_lookup.t ../bpf/probes/raw_insn.h ../bpf/probes/raw_invalidate_hash.t ../bpf/probes/raw_lpm_map.t ../bpf/probes/raw_lru_map.t ../bpf/probes/raw_main.c ../bpf/probes/raw_map_val_adj.t ../bpf/probes/raw_mark_map_val.t ../bpf/probes/raw_max_insn.t ../bpf/probes/raw_sock_cookie.t ../bpf/run_probes.sh ../bpf/sockops/Makefile ../bpf/sockops/bpf_redir.c ../bpf/sockops/bpf_sockops.c ../bpf/sockops/bpf_sockops.h ../bpf/sockops/sockops_config.h
32 changes: 26 additions & 6 deletions pkg/monitor/datapath_trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,25 @@ package monitor
import (
"encoding/json"
"fmt"
"net"

"github.com/cilium/cilium/common/types"
"github.com/cilium/cilium/pkg/monitor/api"
)

const (
// TraceNotifyLen is the amount of packet data provided in a trace notification
TraceNotifyLen = 32
TraceNotifyLen = 48
// TraceReasonEncryptMask is the bit used to indicate encryption or not
TraceReasonEncryptMask uint8 = 0x80
)

const (
// TraceNotifyFlagIsIPv6 is set in TraceNotify.Flags when the
// notification refers to an IPv6 flow
TraceNotifyFlagIsIPv6 uint8 = 1
)

// TraceNotify is the message format of a trace notification in the BPF ring buffer
type TraceNotify struct {
Type uint8
Expand All @@ -40,8 +48,9 @@ type TraceNotify struct {
DstLabel uint32
DstID uint16
Reason uint8
Pad uint8
Flags uint8
Ifindex uint32
OrigIP types.IPv6
// data
}

Expand Down Expand Up @@ -108,16 +117,25 @@ func (n *TraceNotify) traceSummary() string {
}
}

// OriginalIP returns the original source IP if reverse NAT was performed on
// the flow
func (n *TraceNotify) OriginalIP() net.IP {
if (n.Flags & TraceNotifyFlagIsIPv6) != 0 {
return n.OrigIP[:]
}
return n.OrigIP[:4]
}

// DumpInfo prints a summary of the trace messages.
func (n *TraceNotify) DumpInfo(data []byte) {
if n.encryptReason() != "" {
fmt.Printf("%s %s flow %#x identity %d->%d state %s ifindex %s: %s\n",
fmt.Printf("%s %s flow %#x identity %d->%d state %s ifindex %s orig-ip %s: %s\n",
n.traceSummary(), n.encryptReason(), n.Hash, n.SrcLabel, n.DstLabel,
n.traceReason(), ifname(int(n.Ifindex)), GetConnectionSummary(data[TraceNotifyLen:]))
n.traceReason(), ifname(int(n.Ifindex)), n.OriginalIP().String(), GetConnectionSummary(data[TraceNotifyLen:]))
} else {
fmt.Printf("%s flow %#x identity %d->%d state %s ifindex %s: %s\n",
fmt.Printf("%s flow %#x identity %d->%d state %s ifindex %s orig-ip %s: %s\n",
n.traceSummary(), n.Hash, n.SrcLabel, n.DstLabel,
n.traceReason(), ifname(int(n.Ifindex)), GetConnectionSummary(data[TraceNotifyLen:]))
n.traceReason(), ifname(int(n.Ifindex)), n.OriginalIP().String(), GetConnectionSummary(data[TraceNotifyLen:]))
}
}

Expand All @@ -134,6 +152,8 @@ func (n *TraceNotify) DumpVerbose(dissect bool, data []byte, prefix string) {
fmt.Printf(", identity %d->%d", n.SrcLabel, n.DstLabel)
}

fmt.Printf(", orig-ip " + n.OriginalIP().String())

if n.DstID != 0 {
fmt.Printf(", to endpoint %d\n", n.DstID)
} else {
Expand Down

0 comments on commit b3aa583

Please sign in to comment.