Skip to content

Commit

Permalink
Merge branch 'tc-qevents' into next
Browse files Browse the repository at this point in the history
Petr Machata  says:

====================

To allow configuring user-defined actions as a result of inner workings of
a qdisc, a concept of qevents was recently introduced to the kernel.
Qevents are attach points for TC blocks, where filters can be put that are
executed as the packet hits well-defined points in the qdisc algorithms.
The attached blocks can be shared, in a manner similar to clsact ingress
and egress blocks, arbitrary classifiers with arbitrary actions can be put
on them, etc.

For example:

 # tc qdisc add dev eth0 root handle 1: \
	red limit 500K avpkt 1K qevent early_drop block 10
 # tc filter add block 10 \
	matchall action mirred egress mirror dev eth1

This patch set introduces the corresponding iproute2 support. Patch multipath-tcp#1 adds
the new netlink attribute enumerators. Patch multipath-tcp#2 adds a set of helpers to
implement qevents, and multipath-tcp#3 adds a generic documentation to tc.8. Patch multipath-tcp#4
then adds two new qevents to the RED qdisc: mark and early_drop.

====================

Signed-off-by: David Ahern <dsahern@kernel.org>
  • Loading branch information
dsahern committed Jul 5, 2020
2 parents bc4d9f9 + d0e4504 commit 79ea019
Show file tree
Hide file tree
Showing 6 changed files with 315 additions and 4 deletions.
18 changes: 17 additions & 1 deletion man/man8/tc-red.8
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ packets
rate
.B ] [ probability
chance
.B ] [ adaptive ]
.B ] [ adaptive ] [ qevent early_drop block
index
.B ] [ qevent mark block
index
.B ]

.SH DESCRIPTION
Random Early Detection is a classless qdisc which manages its queue size
Expand Down Expand Up @@ -134,6 +138,18 @@ Goal of Adaptive RED is to make 'probability' dynamic value between 1% and 50% t
.B (max - min) / 2
.fi

.SH QEVENTS
See tc (8) for some general notes about qevents. The RED qdisc supports the
following qevents:

.TP
early_drop
The associated block is executed when packets are early-dropped. This includes
non-ECT packets in ECN mode.
.TP
mark
The associated block is executed when packets are marked in ECN mode.

.SH EXAMPLE

.P
Expand Down
19 changes: 19 additions & 0 deletions man/man8/tc.8
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,25 @@ Traffic control filter that matches every packet. See
.BR tc-matchall (8)
for details.

.SH QEVENTS
Qdiscs may invoke user-configured actions when certain interesting events
take place in the qdisc. Each qevent can either be unused, or can have a
block attached to it. To this block are then attached filters using the "tc
block BLOCK_IDX" syntax. The block is executed when the qevent associated
with the attachment point takes place. For example, packet could be
dropped, or delayed, etc., depending on the qdisc and the qevent in
question.

For example:
.PP
.RS
tc qdisc add dev eth0 root handle 1: red limit 500K avpkt 1K \\
qevent early_drop block 10
.RE
.RS
tc filter add block 10 matchall action mirred egress mirror dev eth1
.RE

.SH CLASSLESS QDISCS
The classless qdiscs are:
.TP
Expand Down
1 change: 1 addition & 0 deletions tc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ TCLIB += tc_red.o
TCLIB += tc_cbq.o
TCLIB += tc_estimator.o
TCLIB += tc_stab.o
TCLIB += tc_qevent.o

CFLAGS += -DCONFIG_GACT -DCONFIG_GACT_PROB
ifneq ($(IPT_LIB_DIR),)
Expand Down
30 changes: 27 additions & 3 deletions tc/q_red.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "utils.h"
#include "tc_util.h"
#include "tc_qevent.h"

#include "tc_red.h"

Expand All @@ -30,11 +31,20 @@ static void explain(void)
fprintf(stderr,
"Usage: ... red limit BYTES [min BYTES] [max BYTES] avpkt BYTES [burst PACKETS]\n"
" [adaptive] [probability PROBABILITY] [bandwidth KBPS]\n"
" [ecn] [harddrop] [nodrop]\n");
" [ecn] [harddrop] [nodrop]\n"
" [qevent early_drop block IDX] [qevent mark block IDX]\n");
}

#define RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)

static struct qevent_plain qe_early_drop = {};
static struct qevent_plain qe_mark = {};
static struct qevent_util qevents[] = {
QEVENT("early_drop", plain, &qe_early_drop, TCA_RED_EARLY_DROP_BLOCK),
QEVENT("mark", plain, &qe_mark, TCA_RED_MARK_BLOCK),
{},
};

static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv,
struct nlmsghdr *n, const char *dev)
{
Expand All @@ -51,6 +61,8 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv,
__u32 max_P;
struct rtattr *tail;

qevents_init(qevents);

while (argc > 0) {
if (strcmp(*argv, "limit") == 0) {
NEXT_ARG();
Expand Down Expand Up @@ -109,6 +121,11 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv,
flags_bf.value |= TC_RED_ADAPTATIVE;
} else if (strcmp(*argv, "adaptive") == 0) {
flags_bf.value |= TC_RED_ADAPTATIVE;
} else if (matches(*argv, "qevent") == 0) {
NEXT_ARG();
if (qevent_parse(qevents, &argc, &argv))
return -1;
continue;
} else if (strcmp(*argv, "help") == 0) {
explain();
return -1;
Expand Down Expand Up @@ -162,6 +179,8 @@ static int red_parse_opt(struct qdisc_util *qu, int argc, char **argv,
max_P = probability * pow(2, 32);
addattr_l(n, 1024, TCA_RED_MAX_P, &max_P, sizeof(max_P));
addattr_l(n, 1024, TCA_RED_FLAGS, &flags_bf, sizeof(flags_bf));
if (qevents_dump(qevents, n))
return -1;
addattr_nest_end(n, tail);
return 0;
}
Expand Down Expand Up @@ -203,12 +222,12 @@ static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
print_uint(PRINT_JSON, "min", NULL, qopt->qth_min);
print_string(PRINT_FP, NULL, "min %s ", sprint_size(qopt->qth_min, b2));
print_uint(PRINT_JSON, "max", NULL, qopt->qth_max);
print_string(PRINT_FP, NULL, "max %s ", sprint_size(qopt->qth_max, b3));
print_string(PRINT_FP, NULL, "max %s", sprint_size(qopt->qth_max, b3));

tc_red_print_flags(qopt->flags);

if (show_details) {
print_uint(PRINT_ANY, "ewma", "ewma %u ", qopt->Wlog);
print_uint(PRINT_ANY, "ewma", " ewma %u ", qopt->Wlog);
if (max_P)
print_float(PRINT_ANY, "probability",
"probability %lg ", max_P / pow(2, 32));
Expand All @@ -217,6 +236,11 @@ static int red_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
print_uint(PRINT_ANY, "Scell_log", "Scell_log %u",
qopt->Scell_log);
}

qevents_init(qevents);
if (qevents_read(qevents, tb))
return -1;
qevents_print(qevents, f);
return 0;
}

Expand Down
202 changes: 202 additions & 0 deletions tc/tc_qevent.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause

/*
* Helpers for handling qevents.
*/

#include <stdio.h>
#include <string.h>

#include "tc_qevent.h"
#include "utils.h"

void qevents_init(struct qevent_util *qevents)
{
if (!qevents)
return;

for (; qevents->id; qevents++)
memset(qevents->data, 0, qevents->data_size);
}

int qevent_parse(struct qevent_util *qevents, int *p_argc, char ***p_argv)
{
char **argv = *p_argv;
int argc = *p_argc;
const char *name = *argv;
int err;

if (!qevents)
goto out;

for (; qevents->id; qevents++) {
if (strcmp(name, qevents->id) == 0) {
NEXT_ARG();
err = qevents->parse_qevent(qevents, &argc, &argv);
if (err)
return err;

*p_argc = argc;
*p_argv = argv;
return 0;
}
}

out:
fprintf(stderr, "Unknown qevent `%s'\n", name);
return -1;
}

int qevents_read(struct qevent_util *qevents, struct rtattr **tb)
{
int err;

if (!qevents)
return 0;

for (; qevents->id; qevents++) {
if (tb[qevents->attr]) {
err = qevents->read_qevent(qevents, tb);
if (err)
return err;
}
}

return 0;
}

void qevents_print(struct qevent_util *qevents, FILE *f)
{
int first = true;

if (!qevents)
return;

for (; qevents->id; qevents++) {
struct qevent_base *qeb = qevents->data;

if (qeb->block_idx) {
if (first) {
open_json_array(PRINT_JSON, "qevents");
first = false;
}

open_json_object(NULL);
print_string(PRINT_ANY, "kind", " qevent %s", qevents->id);
qevents->print_qevent(qevents, f);
close_json_object();
}
}

if (!first)
close_json_array(PRINT_ANY, "");
}

int qevents_dump(struct qevent_util *qevents, struct nlmsghdr *n)
{
int err;

if (!qevents)
return 0;

for (; qevents->id; qevents++) {
struct qevent_base *qeb = qevents->data;

if (qeb->block_idx) {
err = qevents->dump_qevent(qevents, n);
if (err)
return err;
}
}

return 0;
}

static int parse_block_idx(const char *arg, struct qevent_base *qeb)
{
if (qeb->block_idx) {
fprintf(stderr, "Qevent block index already specified\n");
return -1;
}

if (get_unsigned(&qeb->block_idx, arg, 10) || !qeb->block_idx) {
fprintf(stderr, "Illegal qevent block index\n");
return -1;
}

return 0;
}

static int read_block_idx(struct rtattr *attr, struct qevent_base *qeb)
{
if (qeb->block_idx) {
fprintf(stderr, "Qevent block index already specified\n");
return -1;
}

qeb->block_idx = rta_getattr_u32(attr);
if (!qeb->block_idx) {
fprintf(stderr, "Illegal qevent block index\n");
return -1;
}

return 0;
}

static void print_block_idx(FILE *f, __u32 block_idx)
{
print_uint(PRINT_ANY, "block", " block %u", block_idx);
}

int qevent_parse_plain(struct qevent_util *qu, int *p_argc, char ***p_argv)
{
struct qevent_plain *qe = qu->data;
char **argv = *p_argv;
int argc = *p_argc;

if (qe->base.block_idx) {
fprintf(stderr, "Duplicate qevent\n");
return -1;
}

while (argc > 0) {
if (strcmp(*argv, "block") == 0) {
NEXT_ARG();
if (parse_block_idx(*argv, &qe->base))
return -1;
} else {
break;
}
NEXT_ARG_FWD();
}

if (!qe->base.block_idx) {
fprintf(stderr, "Unspecified qevent block index\n");
return -1;
}

*p_argc = argc;
*p_argv = argv;
return 0;
}

int qevent_read_plain(struct qevent_util *qu, struct rtattr **tb)
{
struct qevent_plain *qe = qu->data;

return read_block_idx(tb[qu->attr], &qe->base);
}

void qevent_print_plain(struct qevent_util *qu, FILE *f)
{
struct qevent_plain *qe = qu->data;

print_block_idx(f, qe->base.block_idx);
}

int qevent_dump_plain(struct qevent_util *qu, struct nlmsghdr *n)
{
struct qevent_plain *qe = qu->data;

return addattr32(n, 1024, qu->attr, qe->base.block_idx);
}
Loading

0 comments on commit 79ea019

Please sign in to comment.