Skip to content

Commit b5facfd

Browse files
ahsalamdavem330
authored andcommitted
ipv6: sr: Compute flowlabel for outer IPv6 header of seg6 encap mode
ECMP (equal-cost multipath) hashes are typically computed on the packets' 5-tuple(src IP, dst IP, src port, dst port, L4 proto). For encapsulated packets, the L4 data is not readily available and ECMP hashing will often revert to (src IP, dst IP). This will lead to traffic polarization on a single ECMP path, causing congestion and waste of network capacity. In IPv6, the 20-bit flow label field is also used as part of the ECMP hash. In the lack of L4 data, the hashing will be on (src IP, dst IP, flow label). Having a non-zero flow label is thus important for proper traffic load balancing when L4 data is unavailable (i.e., when packets are encapsulated). Currently, the seg6_do_srh_encap() function extracts the original packet's flow label and set it as the outer IPv6 flow label. There are two issues with this behaviour: a) There is no guarantee that the inner flow label is set by the source. b) If the original packet is not IPv6, the flow label will be set to zero (e.g., IPv4 or L2 encap). This patch adds a function, named seg6_make_flowlabel(), that computes a flow label from a given skb. It supports IPv6, IPv4 and L2 payloads, and leverages the per namespace 'seg6_flowlabel" sysctl value. The currently support behaviours are as follows: -1 set flowlabel to zero. 0 copy flowlabel from Inner paceket in case of Inner IPv6 (Set flowlabel to 0 in case IPv4/L2) 1 Compute the flowlabel using seg6_make_flowlabel() This patch has been tested for IPv6, IPv4, and L2 traffic. Signed-off-by: Ahmed Abdelsalam <amsalam20@gmail.com> Acked-by: David Lebrun <dlebrun@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent c749fa1 commit b5facfd

File tree

3 files changed

+31
-2
lines changed

3 files changed

+31
-2
lines changed

include/net/netns/ipv6.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct netns_sysctl_ipv6 {
4343
int max_hbh_opts_cnt;
4444
int max_dst_opts_len;
4545
int max_hbh_opts_len;
46+
int seg6_flowlabel;
4647
};
4748

4849
struct netns_ipv6 {

net/ipv6/seg6_iptunnel.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,24 @@ static void set_tun_src(struct net *net, struct net_device *dev,
9191
rcu_read_unlock();
9292
}
9393

94+
/* Compute flowlabel for outer IPv6 header */
95+
static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
96+
struct ipv6hdr *inner_hdr)
97+
{
98+
int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
99+
__be32 flowlabel = 0;
100+
u32 hash;
101+
102+
if (do_flowlabel > 0) {
103+
hash = skb_get_hash(skb);
104+
rol32(hash, 16);
105+
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
106+
} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
107+
flowlabel = ip6_flowlabel(inner_hdr);
108+
}
109+
return flowlabel;
110+
}
111+
94112
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
95113
int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
96114
{
@@ -99,6 +117,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
99117
struct ipv6hdr *hdr, *inner_hdr;
100118
struct ipv6_sr_hdr *isrh;
101119
int hdrlen, tot_len, err;
120+
__be32 flowlabel;
102121

103122
hdrlen = (osrh->hdrlen + 1) << 3;
104123
tot_len = hdrlen + sizeof(*hdr);
@@ -119,12 +138,13 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
119138
* decapsulation will overwrite inner hlim with outer hlim
120139
*/
121140

141+
flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
122142
if (skb->protocol == htons(ETH_P_IPV6)) {
123143
ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
124-
ip6_flowlabel(inner_hdr));
144+
flowlabel);
125145
hdr->hop_limit = inner_hdr->hop_limit;
126146
} else {
127-
ip6_flow_hdr(hdr, 0, 0);
147+
ip6_flow_hdr(hdr, 0, flowlabel);
128148
hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
129149
}
130150

net/ipv6/sysctl_net_ipv6.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,13 @@ static struct ctl_table ipv6_table_template[] = {
152152
.extra1 = &zero,
153153
.extra2 = &one,
154154
},
155+
{
156+
.procname = "seg6_flowlabel",
157+
.data = &init_net.ipv6.sysctl.seg6_flowlabel,
158+
.maxlen = sizeof(int),
159+
.mode = 0644,
160+
.proc_handler = proc_dointvec
161+
},
155162
{ }
156163
};
157164

@@ -217,6 +224,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
217224
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
218225
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
219226
ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
227+
ipv6_table[15].data = &net->ipv6.sysctl.seg6_flowlabel;
220228

221229
ipv6_route_table = ipv6_route_sysctl_init(net);
222230
if (!ipv6_route_table)

0 commit comments

Comments
 (0)