Skip to content

Commit 9e8ce79

Browse files
jrfastabdavem330
authored andcommitted
net: sched: cls_u32 add bit to specify software only rules
In the initial implementation the only way to stop a rule from being inserted into the hardware table was via the device feature flag. However this doesn't work well when working on an end host system where packets are expect to hit both the hardware and software datapaths. For example we can imagine a rule that will match an IP address and increment a field. If we install this rule in both hardware and software we may increment the field twice. To date we have only added support for the drop action so we have been able to ignore these cases. But as we extend the action support we will hit this example plus more such cases. Arguably these are not even corner cases in many working systems these cases will be common. To avoid forcing the driver to always abort (i.e. the above example) this patch adds a flag to add a rule in software only. A careful user can use this flag to build software and hardware datapaths that work together. One example we have found particularly useful is to use hardware resources to set the skb->mark on the skb when the match may be expensive to run in software but a mark lookup in a hash table is cheap. The idea here is hardware can do in one lookup what the u32 classifier may need to traverse multiple lists and hash tables to compute. The flag is only passed down on inserts. On deletion to avoid stale references in hardware we always try to remove a rule if it exists. The flags field is part of the classifier specific options. Although it is tempting to lift this into the generic structure doing this proves difficult do to how the tc netlink attributes are implemented along with how the dump/change routines are called. There is also precedence for putting seemingly generic pieces in the specific classifier options such as TCA_U32_POLICE, TCA_U32_ACT, etc. So although not ideal I've left FLAGS in the u32 options as well as it simplifies the code greatly and user space has already learned how to manage these bits ala 'tc' tool. Another thing if trying to update a rule we require the flags to be unchanged. This is to force user space, software u32 and the hardware u32 to keep in sync. Thanks to Simon Horman for catching this case. Signed-off-by: John Fastabend <john.r.fastabend@intel.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 2b6ab0d commit 9e8ce79

File tree

3 files changed

+39
-12
lines changed

3 files changed

+39
-12
lines changed

include/net/pkt_cls.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,12 +392,21 @@ struct tc_cls_u32_offload {
392392
};
393393
};
394394

395-
static inline bool tc_should_offload(struct net_device *dev)
395+
/* tca flags definitions */
396+
#define TCA_CLS_FLAGS_SKIP_HW 1
397+
398+
static inline bool tc_should_offload(struct net_device *dev, u32 flags)
396399
{
397400
if (!(dev->features & NETIF_F_HW_TC))
398401
return false;
399402

400-
return dev->netdev_ops->ndo_setup_tc;
403+
if (flags & TCA_CLS_FLAGS_SKIP_HW)
404+
return false;
405+
406+
if (!dev->netdev_ops->ndo_setup_tc)
407+
return false;
408+
409+
return true;
401410
}
402411

403412
#endif

include/uapi/linux/pkt_cls.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ enum {
172172
TCA_U32_INDEV,
173173
TCA_U32_PCNT,
174174
TCA_U32_MARK,
175+
TCA_U32_FLAGS,
175176
__TCA_U32_MAX
176177
};
177178

net/sched/cls_u32.c

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct tc_u_knode {
5959
#ifdef CONFIG_CLS_U32_PERF
6060
struct tc_u32_pcnt __percpu *pf;
6161
#endif
62+
u32 flags;
6263
#ifdef CONFIG_CLS_U32_MARK
6364
u32 val;
6465
u32 mask;
@@ -434,15 +435,17 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
434435
offload.type = TC_SETUP_CLSU32;
435436
offload.cls_u32 = &u32_offload;
436437

437-
if (tc_should_offload(dev)) {
438+
if (tc_should_offload(dev, 0)) {
438439
offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
439440
offload.cls_u32->knode.handle = handle;
440441
dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
441442
tp->protocol, &offload);
442443
}
443444
}
444445

445-
static void u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
446+
static void u32_replace_hw_hnode(struct tcf_proto *tp,
447+
struct tc_u_hnode *h,
448+
u32 flags)
446449
{
447450
struct net_device *dev = tp->q->dev_queue->dev;
448451
struct tc_cls_u32_offload u32_offload = {0};
@@ -451,7 +454,7 @@ static void u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
451454
offload.type = TC_SETUP_CLSU32;
452455
offload.cls_u32 = &u32_offload;
453456

454-
if (tc_should_offload(dev)) {
457+
if (tc_should_offload(dev, flags)) {
455458
offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
456459
offload.cls_u32->hnode.divisor = h->divisor;
457460
offload.cls_u32->hnode.handle = h->handle;
@@ -471,7 +474,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
471474
offload.type = TC_SETUP_CLSU32;
472475
offload.cls_u32 = &u32_offload;
473476

474-
if (tc_should_offload(dev)) {
477+
if (tc_should_offload(dev, 0)) {
475478
offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
476479
offload.cls_u32->hnode.divisor = h->divisor;
477480
offload.cls_u32->hnode.handle = h->handle;
@@ -482,7 +485,9 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
482485
}
483486
}
484487

485-
static void u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
488+
static void u32_replace_hw_knode(struct tcf_proto *tp,
489+
struct tc_u_knode *n,
490+
u32 flags)
486491
{
487492
struct net_device *dev = tp->q->dev_queue->dev;
488493
struct tc_cls_u32_offload u32_offload = {0};
@@ -491,7 +496,7 @@ static void u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
491496
offload.type = TC_SETUP_CLSU32;
492497
offload.cls_u32 = &u32_offload;
493498

494-
if (tc_should_offload(dev)) {
499+
if (tc_should_offload(dev, flags)) {
495500
offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
496501
offload.cls_u32->knode.handle = n->handle;
497502
offload.cls_u32->knode.fshift = n->fshift;
@@ -679,6 +684,7 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
679684
[TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) },
680685
[TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
681686
[TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) },
687+
[TCA_U32_FLAGS] = { .type = NLA_U32 },
682688
};
683689

684690
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
@@ -786,6 +792,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
786792
#endif
787793
new->fshift = n->fshift;
788794
new->res = n->res;
795+
new->flags = n->flags;
789796
RCU_INIT_POINTER(new->ht_down, n->ht_down);
790797

791798
/* bump reference count as long as we hold pointer to structure */
@@ -825,7 +832,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
825832
struct tc_u32_sel *s;
826833
struct nlattr *opt = tca[TCA_OPTIONS];
827834
struct nlattr *tb[TCA_U32_MAX + 1];
828-
u32 htid;
835+
u32 htid, flags = 0;
829836
int err;
830837
#ifdef CONFIG_CLS_U32_PERF
831838
size_t size;
@@ -838,13 +845,19 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
838845
if (err < 0)
839846
return err;
840847

848+
if (tb[TCA_U32_FLAGS])
849+
flags = nla_get_u32(tb[TCA_U32_FLAGS]);
850+
841851
n = (struct tc_u_knode *)*arg;
842852
if (n) {
843853
struct tc_u_knode *new;
844854

845855
if (TC_U32_KEY(n->handle) == 0)
846856
return -EINVAL;
847857

858+
if (n->flags != flags)
859+
return -EINVAL;
860+
848861
new = u32_init_knode(tp, n);
849862
if (!new)
850863
return -ENOMEM;
@@ -861,7 +874,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
861874
u32_replace_knode(tp, tp_c, new);
862875
tcf_unbind_filter(tp, &n->res);
863876
call_rcu(&n->rcu, u32_delete_key_rcu);
864-
u32_replace_hw_knode(tp, new);
877+
u32_replace_hw_knode(tp, new, flags);
865878
return 0;
866879
}
867880

@@ -889,7 +902,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
889902
rcu_assign_pointer(tp_c->hlist, ht);
890903
*arg = (unsigned long)ht;
891904

892-
u32_replace_hw_hnode(tp, ht);
905+
u32_replace_hw_hnode(tp, ht, flags);
893906
return 0;
894907
}
895908

@@ -940,6 +953,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
940953
RCU_INIT_POINTER(n->ht_up, ht);
941954
n->handle = handle;
942955
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
956+
n->flags = flags;
943957
tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
944958
n->tp = tp;
945959

@@ -972,7 +986,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
972986

973987
RCU_INIT_POINTER(n->next, pins);
974988
rcu_assign_pointer(*ins, n);
975-
u32_replace_hw_knode(tp, n);
989+
u32_replace_hw_knode(tp, n, flags);
976990
*arg = (unsigned long)n;
977991
return 0;
978992
}
@@ -1077,6 +1091,9 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
10771091
nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
10781092
goto nla_put_failure;
10791093

1094+
if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
1095+
goto nla_put_failure;
1096+
10801097
#ifdef CONFIG_CLS_U32_MARK
10811098
if ((n->val || n->mask)) {
10821099
struct tc_u32_mark mark = {.val = n->val,

0 commit comments

Comments
 (0)