Skip to content

Commit

Permalink
WRR support for BPF load balancer.
Browse files Browse the repository at this point in the history
Go side changes

1. Extend types.LBBackEnd, Service4Value, Service6Value to take Weight.
2. Generate ServiceRRSeq{}, a sequence of backends to pick based on weights.
3. Create cilium_lb4_rr_seq BPF map mapping master Service4Key to ServiceRRSeq
4. Create cilium_lb6_rr_seq BPF map mapping master Service6Key to ServiceRRSeq

BPF data path changes

1. split lb_select_slave into lb6_select_slave and lb4_select_slave
2. If master has slaves with weights configured do weight based lb
3. Else do hash based lb.

Weight based load balancing.

1. Compute hash like before
2. Lookup lb_sequence and index into (hash % sequence length) for slave idx.

Addressed @aanm, @daniel & @tgraf code review comments.

Signed-off-by: Madhu Challa madhu@cilium.io
  • Loading branch information
mchalla committed Mar 21, 2017
1 parent 5104ad5 commit 75f9b0a
Show file tree
Hide file tree
Showing 18 changed files with 444 additions and 76 deletions.
3 changes: 3 additions & 0 deletions api/v1/models/backend_address.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ type BackendAddress struct {

// Layer 4 port number
Port uint16 `json:"port,omitempty"`

// Weight for Round Robin
Weight uint16 `json:"weight,omitempty"`
}

// Validate validates this backend address
Expand Down
4 changes: 4 additions & 0 deletions api/v1/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,10 @@ definitions:
description: Layer 4 port number
type: integer
format: uint16
weight:
description: Weight for Round Robin
type: integer
format: uint16
Service:
description: Collection of endpoints to be served
type: object
Expand Down
5 changes: 5 additions & 0 deletions api/v1/server/embedded_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,11 @@ func init() {
"description": "Layer 4 port number",
"type": "integer",
"format": "uint16"
},
"weight": {
"description": "Weight for Round Robin",
"type": "integer",
"format": "uint16"
}
}
},
Expand Down
5 changes: 3 additions & 2 deletions bpf/bpf_lb.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#define DISABLE_LOOPBACK_LB

#include <node_config.h>
#include <netdev_config.h>

#include <bpf/api.h>
Expand Down Expand Up @@ -95,7 +96,7 @@ static inline int handle_ipv6(struct __sk_buff *skb)
return TC_ACT_OK;
}

slave = lb_select_slave(skb, svc->count);
slave = lb6_select_slave(skb, &key, svc->count, svc->weight);
if (!(svc = lb6_lookup_slave(skb, &key, slave)))
return DROP_NO_SERVICE;

Expand Down Expand Up @@ -153,7 +154,7 @@ static inline int handle_ipv4(struct __sk_buff *skb)
return TC_ACT_OK;
}

slave = lb_select_slave(skb, svc->count);
slave = lb4_select_slave(skb, &key, svc->count, svc->weight);
if (!(svc = lb4_lookup_slave(skb, &key, slave)))
return DROP_NO_SERVICE;

Expand Down
8 changes: 8 additions & 0 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ struct lb6_service {
__u16 port;
__u16 count;
__u16 rev_nat_index;
__u16 weight;
} __attribute__((packed));

struct lb6_reverse_nat {
Expand All @@ -220,13 +221,20 @@ struct lb4_service {
__u16 port;
__u16 count;
__u16 rev_nat_index;
__u16 weight;
} __attribute__((packed));

struct lb4_reverse_nat {
__be32 address;
__u16 port;
} __attribute__((packed));

// LB_RR_MAX_SEQ generated by daemon in node_config.h
struct lb_sequence {
__u16 count;
__u16 idx[LB_RR_MAX_SEQ];
};

struct ct_state {
__u16 rev_nat_index;
__u16 loopback:1,
Expand Down
1 change: 1 addition & 0 deletions bpf/lib/dbg.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ enum {
DBG_LB4_LOOPBACK_SNAT,
DBG_LB4_LOOPBACK_SNAT_REV,
DBG_CT_LOOKUP4,
DBG_RR_SLAVE_SEL,
};

/* Capture types */
Expand Down
117 changes: 94 additions & 23 deletions bpf/lib/lb.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

/* FIXME: Make configurable */
#define CILIUM_LB_MAP_MAX_ENTRIES 65536
#define CILIUM_LB_MAP_MAX_FE 256

struct bpf_elf_map __section_maps cilium_lb6_reverse_nat = {
.type = BPF_MAP_TYPE_HASH,
Expand All @@ -51,6 +52,14 @@ struct bpf_elf_map __section_maps cilium_lb6_services = {
.max_elem = CILIUM_LB_MAP_MAX_ENTRIES,
};

struct bpf_elf_map __section_maps cilium_lb6_rr_seq = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(struct lb6_key),
.size_value = sizeof(struct lb_sequence),
.pinning = PIN_GLOBAL_NS,
.max_elem = CILIUM_LB_MAP_MAX_FE,
};

struct bpf_elf_map __section_maps cilium_lb4_reverse_nat = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(__u16),
Expand All @@ -67,16 +76,78 @@ struct bpf_elf_map __section_maps cilium_lb4_services = {
.max_elem = CILIUM_LB_MAP_MAX_ENTRIES,
};

struct bpf_elf_map __section_maps cilium_lb4_rr_seq = {
.type = BPF_MAP_TYPE_HASH,
.size_key = sizeof(struct lb4_key),
.size_value = sizeof(struct lb_sequence),
.pinning = PIN_GLOBAL_NS,
.max_elem = CILIUM_LB_MAP_MAX_FE,
};
#define REV_NAT_F_TUPLE_SADDR 1
#ifdef LB_DEBUG
#define cilium_trace_lb cilium_trace
#else
#define cilium_trace_lb(a, b, c, d)
#endif

static inline int lb_select_slave(struct __sk_buff *skb, __u16 count)
static inline int lb_next_rr(struct __sk_buff *skb,
struct lb_sequence *seq,
__be16 hash)
{
int slave = 0;
__u8 offset = hash % seq->count;

if (offset < LB_RR_MAX_SEQ) {
/* Slave 0 is reserved for the master slot */
slave = seq->idx[offset] + 1;
cilium_trace(skb, DBG_RR_SLAVE_SEL, hash, slave);
}

return slave;
}

static inline int lb6_select_slave(struct __sk_buff *skb,
struct lb6_key *key,
__u16 count, __u16 weight)
{
__be16 hash = get_hash_recalc(skb);
int slave;
struct lb_sequence *seq;
int slave = 0;

/* Slave 0 is reserved for the master slot */
slave = (hash % count) + 1;
cilium_trace(skb, DBG_PKT_HASH, hash, slave);
if (weight) {
seq = map_lookup_elem(&cilium_lb6_rr_seq, key);
if (seq && seq->count != 0)
slave = lb_next_rr(skb, seq, hash);
}

if (slave == 0) {
/* Slave 0 is reserved for the master slot */
slave = (hash % count) + 1;
cilium_trace(skb, DBG_PKT_HASH, hash, slave);
}

return slave;
}

static inline int lb4_select_slave(struct __sk_buff *skb,
struct lb4_key *key,
__u16 count, __u16 weight)
{
__be16 hash = get_hash_recalc(skb);
struct lb_sequence *seq;
int slave = 0;

if (weight) {
seq = map_lookup_elem(&cilium_lb4_rr_seq, key);
if (seq && seq->count != 0)
slave = lb_next_rr(skb, seq, hash);
}

if (slave == 0) {
/* Slave 0 is reserved for the master slot */
slave = (hash % count) + 1;
cilium_trace_lb(skb, DBG_PKT_HASH, hash, slave);
}

return slave;
}
Expand Down Expand Up @@ -154,7 +225,7 @@ static inline int __inline__ __lb6_rev_nat(struct __sk_buff *skb, int l4_off,
__be32 sum;
int ret;

cilium_trace(skb, DBG_LB6_REVERSE_NAT, nat->address.p4, nat->port);
cilium_trace_lb(skb, DBG_LB6_REVERSE_NAT, nat->address.p4, nat->port);

if (nat->port) {
ret = reverse_map_l4_port(skb, tuple->nexthdr, nat->port, l4_off, csum_off);
Expand Down Expand Up @@ -200,7 +271,7 @@ static inline int __inline__ lb6_rev_nat(struct __sk_buff *skb, int l4_off,
{
struct lb6_reverse_nat *nat;

cilium_trace(skb, DBG_LB6_REVERSE_NAT_LOOKUP, index, 0);
cilium_trace_lb(skb, DBG_LB6_REVERSE_NAT_LOOKUP, index, 0);
nat = map_lookup_elem(&cilium_lb6_reverse_nat, &index);
if (nat == NULL)
return 0;
Expand Down Expand Up @@ -244,7 +315,7 @@ static inline struct lb6_service *lb6_lookup_service(struct __sk_buff *skb,
if (key->dport) {
struct lb6_service *svc;

cilium_trace(skb, DBG_LB6_LOOKUP_MASTER, key->address.p4, key->dport);
cilium_trace_lb(skb, DBG_LB6_LOOKUP_MASTER, key->address.p4, key->dport);
svc = map_lookup_elem(&cilium_lb6_services, key);
if (svc && svc->count != 0)
return svc;
Expand All @@ -257,14 +328,14 @@ static inline struct lb6_service *lb6_lookup_service(struct __sk_buff *skb,
if (1) {
struct lb6_service *svc;

cilium_trace(skb, DBG_LB6_LOOKUP_MASTER, key->address.p4, key->dport);
cilium_trace_lb(skb, DBG_LB6_LOOKUP_MASTER, key->address.p4, key->dport);
svc = map_lookup_elem(&cilium_lb6_services, key);
if (svc && svc->count != 0)
return svc;
}
#endif

cilium_trace(skb, DBG_LB6_LOOKUP_MASTER_FAIL, key->address.p2, key->address.p3);
cilium_trace_lb(skb, DBG_LB6_LOOKUP_MASTER_FAIL, key->address.p2, key->address.p3);
return NULL;
}

Expand All @@ -274,10 +345,10 @@ static inline struct lb6_service *lb6_lookup_slave(struct __sk_buff *skb,
struct lb6_service *svc;

key->slave = slave;
cilium_trace(skb, DBG_LB6_LOOKUP_SLAVE, key->slave, key->dport);
cilium_trace_lb(skb, DBG_LB6_LOOKUP_SLAVE, key->slave, key->dport);
svc = map_lookup_elem(&cilium_lb6_services, key);
if (svc != NULL) {
cilium_trace(skb, DBG_LB6_LOOKUP_SLAVE_SUCCESS, svc->target.p4, svc->port);
cilium_trace_lb(skb, DBG_LB6_LOOKUP_SLAVE_SUCCESS, svc->target.p4, svc->port);
return svc;
}

Expand Down Expand Up @@ -319,7 +390,7 @@ static inline int __inline__ lb6_local(struct __sk_buff *skb, int l3_off, int l4
{
__u16 slave;

slave = lb_select_slave(skb, svc->count);
slave = lb6_select_slave(skb, key, svc->count, svc->weight);
if (!(svc = lb6_lookup_slave(skb, key, slave)))
return DROP_NO_SERVICE;

Expand All @@ -341,7 +412,7 @@ static inline int __inline__ __lb4_rev_nat(struct __sk_buff *skb, int l3_off, in
__be32 old_sip, new_sip, sum = 0;
int ret;

cilium_trace(skb, DBG_LB4_REVERSE_NAT, nat->address, nat->port);
cilium_trace_lb(skb, DBG_LB4_REVERSE_NAT, nat->address, nat->port);

if (nat->port) {
ret = reverse_map_l4_port(skb, tuple->nexthdr, nat->port, l4_off, csum_off);
Expand Down Expand Up @@ -372,7 +443,7 @@ static inline int __inline__ __lb4_rev_nat(struct __sk_buff *skb, int l3_off, in
if (IS_ERR(ret))
return ret;

cilium_trace(skb, DBG_LB4_LOOPBACK_SNAT_REV, old_dip, old_sip);
cilium_trace_lb(skb, DBG_LB4_LOOPBACK_SNAT_REV, old_dip, old_sip);

ret = skb_store_bytes(skb, l3_off + offsetof(struct iphdr, daddr), &old_sip, 4, 0);
if (IS_ERR(ret))
Expand Down Expand Up @@ -416,7 +487,7 @@ static inline int __inline__ lb4_rev_nat(struct __sk_buff *skb, int l3_off, int
{
struct lb4_reverse_nat *nat;

cilium_trace(skb, DBG_LB4_REVERSE_NAT_LOOKUP, ct_state->rev_nat_index, 0);
cilium_trace_lb(skb, DBG_LB4_REVERSE_NAT_LOOKUP, ct_state->rev_nat_index, 0);
nat = map_lookup_elem(&cilium_lb4_reverse_nat, &ct_state->rev_nat_index);
if (nat == NULL)
return 0;
Expand Down Expand Up @@ -459,7 +530,7 @@ static inline struct lb4_service *lb4_lookup_service(struct __sk_buff *skb,
struct lb4_service *svc;

/* FIXME: The verifier barks on these calls right now for some reason */
/* cilium_trace(skb, DBG_LB4_LOOKUP_MASTER, key->address, key->dport); */
/* cilium_trace_lb(skb, DBG_LB4_LOOKUP_MASTER, key->address, key->dport); */
svc = map_lookup_elem(&cilium_lb4_services, key);
if (svc && svc->count != 0)
return svc;
Expand All @@ -473,14 +544,14 @@ static inline struct lb4_service *lb4_lookup_service(struct __sk_buff *skb,
struct lb4_service *svc;

/* FIXME: The verifier barks on these calls right now for some reason */
/* cilium_trace(skb, DBG_LB4_LOOKUP_MASTER, key->address, key->dport); */
/* cilium_trace_lb(skb, DBG_LB4_LOOKUP_MASTER, key->address, key->dport); */
svc = map_lookup_elem(&cilium_lb4_services, key);
if (svc && svc->count != 0)
return svc;
}
#endif

cilium_trace(skb, DBG_LB4_LOOKUP_MASTER_FAIL, 0, 0);
cilium_trace_lb(skb, DBG_LB4_LOOKUP_MASTER_FAIL, 0, 0);
return NULL;
}

Expand All @@ -490,10 +561,10 @@ static inline struct lb4_service *lb4_lookup_slave(struct __sk_buff *skb,
struct lb4_service *svc;

key->slave = slave;
cilium_trace(skb, DBG_LB4_LOOKUP_SLAVE, key->slave, key->dport);
cilium_trace_lb(skb, DBG_LB4_LOOKUP_SLAVE, key->slave, key->dport);
svc = map_lookup_elem(&cilium_lb4_services, key);
if (svc != NULL) {
cilium_trace(skb, DBG_LB4_LOOKUP_SLAVE_SUCCESS, svc->target, svc->port);
cilium_trace_lb(skb, DBG_LB4_LOOKUP_SLAVE_SUCCESS, svc->target, svc->port);
return svc;
}

Expand All @@ -516,7 +587,7 @@ lb4_xlate(struct __sk_buff *skb, __be32 *new_daddr, __be32 *new_saddr,
sum = csum_diff(&key->address, 4, new_daddr, 4, 0);

if (new_saddr && *new_saddr) {
cilium_trace(skb, DBG_LB4_LOOPBACK_SNAT, *old_saddr, *new_saddr);
cilium_trace_lb(skb, DBG_LB4_LOOPBACK_SNAT, *old_saddr, *new_saddr);
ret = skb_store_bytes(skb, l3_off + offsetof(struct iphdr, saddr), new_saddr, 4, 0);
if (ret < 0)
return DROP_WRITE_ERROR;
Expand Down Expand Up @@ -555,7 +626,7 @@ static inline int __inline__ lb4_local(struct __sk_buff *skb, int l3_off, int l4
__be32 new_saddr = 0, new_daddr;
__u16 slave;

slave = lb_select_slave(skb, svc->count);
slave = lb4_select_slave(skb, key, svc->count, svc->weight);
if (!(svc = lb4_lookup_slave(skb, key, slave)))
return DROP_NO_SERVICE;

Expand Down
1 change: 1 addition & 0 deletions bpf/node_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@
#define ENABLE_ARP_RESPONDER
#define NODE_MAC { .addr = { 0xde, 0xad, 0xbe, 0xef, 0xc0, 0xde } }
#define ENABLE_IPV4
#define LB_RR_MAX_SEQ 31
7 changes: 6 additions & 1 deletion cilium/cmd/service_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,12 @@ func listServices() {
fmt.Fprintf(os.Stderr, "error parsing backend %+v", be)
continue
}
str := fmt.Sprintf("%d => %s (%d)", i+1, beA.String(), svc.ID)
var str string
if be.Weight != 0 {
str = fmt.Sprintf("%d => %s (W: %d, ID: %d)", i+1, beA.String(), be.Weight, svc.ID)
} else {
str = fmt.Sprintf("%d => %s (%d)", i+1, beA.String(), svc.ID)
}
besWithID = append(besWithID, str)
}

Expand Down

0 comments on commit 75f9b0a

Please sign in to comment.