Skip to content

Commit

Permalink
nat46: Enable ipv6 containers to talk to an ipv4 container
Browse files Browse the repository at this point in the history
If destination is a v4 mapped v6 address ::FFFF:<a.b.c.d> use nat46
to talk to the v4 container.

ENABLE_NAT46 depends on LXC_IPV4 and CONNTRACK features.

packet flow:
v6 container(lxc1) ---------- nat64 -----------> v4 container (lxc2)
v6 container(lxc1) <--------- nat46 ------------ v4 container (lxc2)

lxc1 v4 conntrack will have needs_nat46=1 in lxc2 -> lxc1 direction.

Address @tgraf review comments.

1. LXC_NAT46 to represent combination of ENABLE_NAT46, ENABLE_IPV4 & CONNTRACK
2. rename isMappedIPv6Addr to ipv6_addr_is_mapped and move it to ipv6.h
3. Move ipv6_policy after v46 nat to a tail call.
4. Resolve merge conflict by renaming 07-nat46.sh to 08-nat46.sh
5. Cleanups suggested by @tgraf
6. Move NAT46 processing into its own tail calls to keep the insturction complexity under 4K.
7. Move tail_ipv6_to_ipv4 and tail_ipv4_to_ipv6 to bpf_lxc.c

Acked-by: Thomas Graf <thomas@cilium.io>
Signed-off-by: Madhu Challa madhu@cilium.io
  • Loading branch information
mchalla committed Dec 6, 2016
1 parent ba2900c commit 9e108c1
Show file tree
Hide file tree
Showing 13 changed files with 196 additions and 68 deletions.
62 changes: 47 additions & 15 deletions bpf/bpf_lxc.c
Expand Up @@ -106,7 +106,7 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb,
{
union macaddr router_mac = NODE_MAC;
union v6addr host_ip = HOST_IP;
int do_nat46 = 0, ret, l4_off;
int ret, l4_off;
struct csum_offset csum_off = {};
struct lb6_service *svc;
struct lb6_key key = {};
Expand Down Expand Up @@ -238,12 +238,11 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb,

return ipv6_local_delivery(skb, l3_off, l4_off, SECLABEL, ip6, tuple->nexthdr);
} else {
#ifdef ENABLE_NAT46
/* FIXME: Derive from prefix constant */
if (unlikely((tuple->addr.p1 & 0xffff) == 0xadde)) {
do_nat46 = 1;
goto to_host;
}
#ifdef LXC_NAT46
if (unlikely(ipv6_addr_is_mapped(&tuple->addr))) {
tail_call(skb, &cilium_calls, CILIUM_CALL_NAT64);
return DROP_MISSED_TAIL_CALL;
}
#endif

#ifdef ALLOW_TO_WORLD
Expand All @@ -263,14 +262,6 @@ static inline int ipv6_l3_from_lxc(struct __sk_buff *skb,
if (ret != TC_ACT_OK)
return ret;

if (do_nat46) {
union v6addr dp = NAT46_DST_PREFIX;

ret = ipv6_to_ipv4(skb, 14, &dp, IPV4_RANGE | (LXC_ID_NB <<16));
if (IS_ERR(ret))
return ret;
}

#ifndef POLICY_ENFORCEMENT
cilium_trace_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX);
return redirect(HOST_IFINDEX, 0);
Expand Down Expand Up @@ -688,6 +679,13 @@ static inline int __inline__ ipv4_policy(struct __sk_buff *skb, int ifindex, __u
if (ret < 0)
return ret;

#ifdef LXC_NAT46
if (skb->cb[CB_NAT46_STATE] == NAT46) {
tail_call(skb, &cilium_calls, CILIUM_CALL_NAT46);
return DROP_MISSED_TAIL_CALL;
}
#endif

if (unlikely(src_rev_nat)) {
int ret2;

Expand Down Expand Up @@ -745,4 +743,38 @@ __section_tail(CILIUM_MAP_POLICY, LXC_ID) int handle_policy(struct __sk_buff *sk
return redirect(ifindex, 0);
}

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_NAT64) int tail_ipv6_to_ipv4(struct __sk_buff *skb)
{
int ret = ipv6_to_ipv4(skb, 14, htonl(LXC_IPV4));
if (IS_ERR(ret))
return ret;

cilium_trace_capture(skb, DBG_CAPTURE_AFTER_V64, skb->ingress_ifindex);

skb->cb[CB_NAT46_STATE] = NAT64;

tail_call(skb, &cilium_calls, CILIUM_CALL_IPV4);
return DROP_MISSED_TAIL_CALL;
}

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_NAT46) int tail_ipv4_to_ipv6(struct __sk_buff *skb)
{
union v6addr dp = LXC_IP;
void *data = (void *) (long) skb->data;
void *data_end = (void *) (long) skb->data_end;
struct iphdr *ip4 = data + ETH_HLEN;
int ret;

if (data + sizeof(*ip4) + ETH_HLEN > data_end)
return DROP_INVALID;

ret = ipv4_to_ipv6(skb, ip4, 14, &dp);
if (IS_ERR(ret))
return ret;

cilium_trace_capture(skb, DBG_CAPTURE_AFTER_V46, skb->ingress_ifindex);

tail_call(skb, &cilium_policy, LXC_ID);
return DROP_MISSED_TAIL_CALL;
}
BPF_LICENSE("GPL");
24 changes: 1 addition & 23 deletions bpf/bpf_netdev.c
Expand Up @@ -37,7 +37,6 @@
#include "lib/eth.h"
#include "lib/dbg.h"
#include "lib/l3.h"
#include "lib/nat46.h"
#include "lib/policy.h"
#include "lib/drop.h"

Expand Down Expand Up @@ -130,27 +129,6 @@ static inline int handle_ipv4(struct __sk_buff *skb)
}
#endif

#ifdef ENABLE_NAT46
if (1) {
union v6addr sp = NAT46_SRC_PREFIX;
union v6addr dp = HOST_IP;
int ret;

if (data + sizeof(*ip) + ETH_HLEN > data_end)
return DROP_INVALID;

if ((ip->daddr & IPV4_MASK) != IPV4_RANGE)
return TC_ACT_OK;

ret = ipv4_to_ipv6(skb, ip4, 14, &sp, &dp);
if (IS_ERR(ret))
return ret;

proto = __constant_htons(ETH_P_IPV6);
skb->tc_index = 1;
}
#endif

return TC_ACT_OK;
}

Expand Down Expand Up @@ -179,7 +157,7 @@ int from_netdev(struct __sk_buff *skb)
ret = handle_ipv6(skb);
break;

#if defined ENABLE_IPV4 || defined ENABLE_NAT46
#ifdef ENABLE_IPV4
case __constant_htons(ETH_P_IP):
tail_call(skb, &cilium_calls, CILIUM_CALL_IPV4);
ret = DROP_MISSED_TAIL_CALL;
Expand Down
15 changes: 13 additions & 2 deletions bpf/lib/common.h
Expand Up @@ -40,7 +40,9 @@
#define CILIUM_CALL_SEND_ICMP6_TIME_EXCEEDED 5
#define CILIUM_CALL_ARP 6
#define CILIUM_CALL_IPV4 7
#define CILIUM_CALL_SIZE 8
#define CILIUM_CALL_NAT64 8
#define CILIUM_CALL_NAT46 9
#define CILIUM_CALL_SIZE 10

typedef __u64 mac_t;

Expand Down Expand Up @@ -143,6 +145,14 @@ enum {
CB_SRC_LABEL,
CB_IFINDEX,
CB_POLICY,
CB_NAT46_STATE,
};

/* State values for NAT46 */
enum {
NAT46_CLEAR,
NAT64,
NAT46,
};

/* Flag values for CB_POLICY */
Expand Down Expand Up @@ -177,7 +187,8 @@ struct ct_entry {
__u16 lifetime;
__u16 rx_closing:1,
tx_closing:1,
reserve:14;
nat46:1,
reserve:13;
__u16 rev_nat_index;
};

Expand Down
15 changes: 15 additions & 0 deletions bpf/lib/conntrack.h
Expand Up @@ -60,6 +60,12 @@ static inline int __inline__ __ct_lookup(void *map, struct __sk_buff *skb,
if (rev_nat_index)
*rev_nat_index = entry->rev_nat_index;;

#ifdef LXC_NAT46
/* This packet needs nat46 translation */
if (entry->nat46 && !skb->cb[CB_NAT46_STATE])
skb->cb[CB_NAT46_STATE] = NAT46;
#endif

#ifdef CONNTRACK_ACCOUNTING
/* FIXME: This is slow, per-cpu counters? */
if (in) {
Expand Down Expand Up @@ -232,6 +238,10 @@ static inline int __inline__ ct_lookup6(void *map, struct ipv6_ct_tuple *tuple,
(tuple->nexthdr << 8) | tuple->flags);
ret = __ct_lookup(map, skb, tuple, action, in, NULL);

#ifdef LXC_NAT46
skb->cb[CB_NAT46_STATE] = NAT46_CLEAR;
#endif

/* No entries found, packet must be eligible for creating a CT entry */
if (ret == CT_NEW && action != ACTION_CREATE)
ret = DROP_CT_CANT_CREATE;
Expand Down Expand Up @@ -436,6 +446,11 @@ static inline int __inline__ ct_create4(void *map, struct ipv4_ct_tuple *tuple,
entry.tx_bytes = skb->len;
}

#ifdef LXC_NAT46
if (skb->cb[CB_NAT46_STATE] == NAT64)
entry.nat46 = !in;
#endif

cilium_trace(skb, DBG_CT_CREATED, (ntohs(tuple->sport) << 16) | ntohs(tuple->dport),
(tuple->nexthdr << 8) | tuple->flags);
cilium_trace(skb, DBG_CT_CREATED2, tuple->addr, rev_nat_index);
Expand Down
2 changes: 2 additions & 0 deletions bpf/lib/dbg.h
Expand Up @@ -63,6 +63,8 @@ enum {
DBG_CAPTURE_FROM_OVERLAY,
DBG_CAPTURE_DELIVERY,
DBG_CAPTURE_FROM_LB,
DBG_CAPTURE_AFTER_V46,
DBG_CAPTURE_AFTER_V64,
};

#ifdef DEBUG
Expand Down
7 changes: 7 additions & 0 deletions bpf/lib/ipv6.h
Expand Up @@ -270,4 +270,11 @@ static inline __be32 ipv6_pseudohdr_checksum(struct ipv6hdr *hdr,
return sum;
}

/*
* Ipv4 mapped address - 0:0:0:0:0:FFFF::/96
*/
static inline int ipv6_addr_is_mapped(union v6addr *addr)
{
return addr->p1 == 0 && addr->p2 == 0 && addr->p3 == 0xFFFF0000;
}
#endif /* __LIB_IPV6__ */
52 changes: 30 additions & 22 deletions bpf/lib/nat46.h
Expand Up @@ -26,6 +26,17 @@
#include "eth.h"
#include "dbg.h"

#if defined ENABLE_NAT46
#if defined ENABLE_IPV4 && defined CONNTRACK
#define LXC_NAT46
#else
#warning "ENABLE_NAT46 requires ENABLE_IPv4 and CONNTRACK"
#undef LXC_NAT46
#endif
#else
#undef LXC_NAT46
#endif

static inline int get_csum_offset(__u8 protocol)
{
int csum_off;
Expand Down Expand Up @@ -224,12 +235,11 @@ static inline int ipv6_prefix_match(struct in6_addr *addr,
/*
* ipv4 to ipv6 stateless nat
* (s4,d4) -> (s6,d6)
* s6 = v6prefix_src<s4>
* d6 = v6prefix_dst<d4>
* s6 = nat46_prefix<s4>
* d6 = nat46_prefix<d4> or v6_dst if non null
*/
static inline int ipv4_to_ipv6(struct __sk_buff *skb, struct iphdr *ip4,
int nh_off, union v6addr *v6prefix_src,
union v6addr *v6predix_dst)
int nh_off, union v6addr *v6_dst)
{
struct ipv6hdr v6 = {};
struct iphdr v4;
Expand All @@ -238,6 +248,7 @@ static inline int ipv4_to_ipv6(struct __sk_buff *skb, struct iphdr *ip4,
__be16 v4hdr_len;
__be16 protocol = htons(ETH_P_IPV6);
__u64 csum_flags = BPF_F_PSEUDO_HDR;
union v6addr nat46_prefix = NAT46_PREFIX;

if (skb_load_bytes(skb, nh_off, &v4, sizeof(v4)) < 0)
return DROP_INVALID;
Expand All @@ -247,15 +258,22 @@ static inline int ipv4_to_ipv6(struct __sk_buff *skb, struct iphdr *ip4,

/* build v6 header */
v6.version = 0x6;
v6.saddr.in6_u.u6_addr32[0] = v6prefix_src->p1;
v6.saddr.in6_u.u6_addr32[1] = v6prefix_src->p2;
v6.saddr.in6_u.u6_addr32[2] = v6prefix_src->p3;
v6.saddr.in6_u.u6_addr32[0] = nat46_prefix.p1;
v6.saddr.in6_u.u6_addr32[1] = nat46_prefix.p2;
v6.saddr.in6_u.u6_addr32[2] = nat46_prefix.p3;
v6.saddr.in6_u.u6_addr32[3] = v4.saddr;

v6.daddr.in6_u.u6_addr32[0] = v6predix_dst->p1;
v6.daddr.in6_u.u6_addr32[1] = v6predix_dst->p2;
v6.daddr.in6_u.u6_addr32[2] = v6predix_dst->p3;
v6.daddr.in6_u.u6_addr32[3] = htonl((ntohl(v6predix_dst->p4) & 0xFFFF0000) | (ntohl(v4.daddr) & 0xFFFF));
if (v6_dst) {
v6.daddr.in6_u.u6_addr32[0] = v6_dst->p1;
v6.daddr.in6_u.u6_addr32[1] = v6_dst->p2;
v6.daddr.in6_u.u6_addr32[2] = v6_dst->p3;
v6.daddr.in6_u.u6_addr32[3] = v6_dst->p4;
} else {
v6.daddr.in6_u.u6_addr32[0] = nat46_prefix.p1;
v6.daddr.in6_u.u6_addr32[1] = nat46_prefix.p2;
v6.daddr.in6_u.u6_addr32[2] = nat46_prefix.p3;
v6.daddr.in6_u.u6_addr32[3] = htonl((ntohl(nat46_prefix.p4) & 0xFFFF0000) | (ntohl(v4.daddr) & 0xFFFF));
}

if (v4.protocol == IPPROTO_ICMP)
v6.nexthdr = IPPROTO_ICMPV6;
Expand Down Expand Up @@ -314,9 +332,7 @@ static inline int ipv4_to_ipv6(struct __sk_buff *skb, struct iphdr *ip4,
* s4 = <ipv4-range>.<lxc-id>
* d4 = d6[96 .. 127]
*/
static inline int ipv6_to_ipv4(struct __sk_buff *skb, int nh_off,
union v6addr *v6prefix_dst,
__u32 saddr)
static inline int ipv6_to_ipv4(struct __sk_buff *skb, int nh_off, __be32 saddr)
{
struct ipv6hdr v6;
struct iphdr v4 = {};
Expand All @@ -332,13 +348,6 @@ static inline int ipv6_to_ipv4(struct __sk_buff *skb, int nh_off,
if (ipv6_hdrlen(skb, nh_off, &v6.nexthdr) != sizeof(v6))
return DROP_INVALID_EXTHDR;

if (!ipv6_prefix_match(&v6.daddr, v6prefix_dst)) {
#ifdef DEBUG_NAT46
printk("v64 nat dst prefix mismatch\n");
#endif
return 0;
}

/* build v4 header */
v4.ihl = 0x5;
v4.version = 0x4;
Expand Down Expand Up @@ -400,5 +409,4 @@ static inline int ipv6_to_ipv4(struct __sk_buff *skb, int nh_off,

return 0;
}

#endif /* __LIB_NAT46__ */
3 changes: 1 addition & 2 deletions bpf/node_config.h
Expand Up @@ -28,8 +28,7 @@
#define HOST_ID 1
#define WORLD_ID 2
#define HOST_IFINDEX_MAC { .addr = { 0xce, 0x72, 0xa7, 0x03, 0x88, 0x56 } }
#define NAT46_SRC_PREFIX { .addr = { 0xbe, 0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xa, 0x0, 0x0, 0x0, 0x0, 0x0 } }
#define NAT46_DST_PREFIX { .addr = { 0xbe, 0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xa, 0x0, 0x0, 0x0, 0x0, 0x0 } }
#define NAT46_PREFIX { .addr = { 0xbe, 0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xa, 0x0, 0x0, 0x0, 0x0, 0x0 } }
#define IPV4_RANGE 0xf50a
#define IPV4_MASK 0xffff
#define IPV4_CLUSTER_MASK 0xff0000
Expand Down
2 changes: 1 addition & 1 deletion common/addressing/defaults.go
Expand Up @@ -32,7 +32,7 @@ const (
// Default IPv4 prefix length of entire cluster
DefaultIPv4ClusterPrefixLen = 8
// Default IPv6 prefix to represent NATed IPv4 addresses
DefaultNAT46Prefix = "aa46::/48"
DefaultNAT46Prefix = "0:0:0:0:0:FFFF::/96"
)

var (
Expand Down
6 changes: 6 additions & 0 deletions common/bpf/debug.go
Expand Up @@ -28,6 +28,8 @@ const (
DBG_CAPTURE_FROM_OVERLAY
DBG_CAPTURE_DELIVERY
DBG_CAPTURE_FROM_LB
DBG_CAPTURE_AFTER_V46
DBG_CAPTURE_AFTER_V64
)

const (
Expand Down Expand Up @@ -200,6 +202,10 @@ func (n *DebugCapture) Dump(dissect bool, data []byte, prefix string) {
fmt.Printf("Delivery to ifindex %d\n", n.Arg1)
case DBG_CAPTURE_FROM_LB:
fmt.Printf("Incoming packet to load balancer on ifindex %d\n", n.Arg1)
case DBG_CAPTURE_AFTER_V46:
fmt.Printf("Packet after nat46 ifindex %d\n", n.Arg1)
case DBG_CAPTURE_AFTER_V64:
fmt.Printf("Packet after nat64 ifindex %d\n", n.Arg1)
default:
fmt.Printf("Unknown message type=%d arg1=%d\n", n.SubType, n.Arg1)
}
Expand Down
3 changes: 1 addition & 2 deletions daemon/daemon/daemon.go
Expand Up @@ -205,8 +205,7 @@ func (d *Daemon) init() error {
fmt.Fprintf(fw, "#define IPV4_CLUSTER_MASK %#x\n", binary.LittleEndian.Uint32(ipv4ClusterRange.Mask))

if nat46Range := d.conf.NAT46Prefix; nat46Range != nil {
fw.WriteString(common.FmtDefineAddress("NAT46_SRC_PREFIX", nat46Range.IP))
fw.WriteString(common.FmtDefineAddress("NAT46_DST_PREFIX", nat46Range.IP))
fw.WriteString(common.FmtDefineAddress("NAT46_PREFIX", nat46Range.IP))
}

fw.WriteString(common.FmtDefineAddress("HOST_IP", hostIP))
Expand Down
2 changes: 1 addition & 1 deletion daemon/main.go
Expand Up @@ -355,7 +355,7 @@ func initEnv(ctx *cli.Context) error {
}

config.Opts.Set(types.OptionDropNotify, true)
config.Opts.Set(types.OptionNAT46, false)
config.Opts.Set(types.OptionNAT46, true)
config.Opts.Set(daemon.OptionPolicyTracing, enableTracing)
config.Opts.Set(types.OptionConntrack, !disableConntrack)
config.Opts.Set(types.OptionConntrackAccounting, !disableConntrack)
Expand Down

0 comments on commit 9e108c1

Please sign in to comment.