From c95b2b16a69186faa54fc902abf996c8c28b451c Mon Sep 17 00:00:00 2001 From: Evgenii Shatokhin Date: Wed, 30 Sep 2015 22:03:51 +0300 Subject: [PATCH] examples: Added an example of a problematic patch with an explanation examples/tcp_cubic-better-follow-cubic-curve-original.patch is the original patch, combined from two mainline commits (see the description in the patch). It cannot be used with Kpatch as it is because the change is in the initialization of a global structure. examples/tcp_cubic-better-follow-cubic-curve-converted.patch is a modification of the patch that Kpatch can process. Still, this modification has its issues, see the description there. Signed-off-by: Evgenii Shatokhin --- ...-better-follow-cubic-curve-converted.patch | 105 ++++++++++++++++++ ...c-better-follow-cubic-curve-original.patch | 58 ++++++++++ 2 files changed, 163 insertions(+) create mode 100644 examples/tcp_cubic-better-follow-cubic-curve-converted.patch create mode 100644 examples/tcp_cubic-better-follow-cubic-curve-original.patch diff --git a/examples/tcp_cubic-better-follow-cubic-curve-converted.patch b/examples/tcp_cubic-better-follow-cubic-curve-converted.patch new file mode 100644 index 000000000..f16e5ff11 --- /dev/null +++ b/examples/tcp_cubic-better-follow-cubic-curve-converted.patch @@ -0,0 +1,105 @@ +The original patch changes the initialization of 'cubictcp' instance of +struct tcp_congestion_ops ('cubictcp.cwnd_event' field). Kpatch +intentionally rejects to process such changes. + +This modification of the patch uses Kpatch load/unload hooks to set +'cubictcp.cwnd_event' when the binary patch is loaded and reset it to NULL +when the patch is unloaded. + +It is still needed to check if changing that field could be problematic +due to concurrency issues, etc. + +'cwnd_event' callback is used only via tcp_ca_event() function. + +include/net/tcp.h: + +static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cwnd_event) + icsk->icsk_ca_ops->cwnd_event(sk, event); +} + +In turn, tcp_ca_event() is called in a number of places in +net/ipv4/tcp_output.c and net/ipv4/tcp_input.c. + +One problem with this modification of the patch is that it may not be safe +to unload it. If it is possible for tcp_ca_event() to run concurrently with +the unloading of the patch, it may happen that 'icsk->icsk_ca_ops->cwnd_event' +is the address of bictcp_cwnd_event() when tcp_ca_event() checks it but is +set to NULL right after. So 'icsk->icsk_ca_ops->cwnd_event(sk, event)' would +result in a kernel oops. + +Whether such scenario is possible or not, it should be analyzed. If it is, +then at least, the body of tcp_ca_event() should be made atomic w.r.t. +changing 'cwnd_event' in the patch somehow. Perhaps, RCU could be suitable +for that: a read-side critical section for the body of tcp_ca_event() with +a single read of icsk->icsk_ca_ops->cwnd_event pointer with rcu_dereference(). +The pointer could be set by the patch with rcu_assign_pointer(). + +An alternative suggested by Josh Poimboeuf would be to patch the functions +that call 'cwnd_event' callback (tcp_ca_event() in this case) so that they +call bictcp_cwnd_event() directly when they detect the cubictcp struct [1]. + +Note that tcp_ca_event() is inlined in a number of places, so the binary +patch will provide replacements for all of the corresponding functions +rather than for just one. It is still needed to check if replacing these +functions in runtime is safe. + +References: +[1] https://www.redhat.com/archives/kpatch/2015-September/msg00005.html + +diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c +index 894b7ce..9bff8a0 100644 +--- a/net/ipv4/tcp_cubic.c ++++ b/net/ipv4/tcp_cubic.c +@@ -153,6 +153,27 @@ static void bictcp_init(struct sock *sk) + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; + } + ++static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) ++{ ++ if (event == CA_EVENT_TX_START) { ++ struct bictcp *ca = inet_csk_ca(sk); ++ u32 now = tcp_time_stamp; ++ s32 delta; ++ ++ delta = now - tcp_sk(sk)->lsndtime; ++ ++ /* We were application limited (idle) for a while. ++ * Shift epoch_start to keep cwnd growth to cubic curve. ++ */ ++ if (ca->epoch_start && delta > 0) { ++ ca->epoch_start += delta; ++ if (after(ca->epoch_start, now)) ++ ca->epoch_start = now; ++ } ++ return; ++ } ++} ++ + /* calculate the cubic root of x using a table lookup followed by one + * Newton-Raphson iteration. + * Avg err ~= 0.195% +@@ -444,6 +465,20 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { + .name = "cubic", + }; + ++void kpatch_load_cubictcp_cwnd_event(void) ++{ ++ cubictcp.cwnd_event = bictcp_cwnd_event; ++} ++ ++void kpatch_unload_cubictcp_cwnd_event(void) ++{ ++ cubictcp.cwnd_event = NULL; ++} ++ ++#include "kpatch-macros.h" ++KPATCH_LOAD_HOOK(kpatch_load_cubictcp_cwnd_event); ++KPATCH_UNLOAD_HOOK(kpatch_unload_cubictcp_cwnd_event); ++ + static int __init cubictcp_register(void) + { + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); diff --git a/examples/tcp_cubic-better-follow-cubic-curve-original.patch b/examples/tcp_cubic-better-follow-cubic-curve-original.patch new file mode 100644 index 000000000..dacc89f52 --- /dev/null +++ b/examples/tcp_cubic-better-follow-cubic-curve-original.patch @@ -0,0 +1,58 @@ +This patch is for 3.10.x. +It combines the following commits from the mainline: + +commit 30927520dbae297182990bb21d08762bcc35ce1d +Author: Eric Dumazet +Date: Wed Sep 9 21:55:07 2015 -0700 + + tcp_cubic: better follow cubic curve after idle period + +commit c2e7204d180f8efc80f27959ca9cf16fa17f67db +Author: Eric Dumazet +Date: Thu Sep 17 08:38:00 2015 -0700 + + tcp_cubic: do not set epoch_start in the future + +References: +http://www.phoronix.com/scan.php?page=news_item&px=Google-Fixes-TCP-Linux + +diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c +index 894b7ce..872b3a0 100644 +--- a/net/ipv4/tcp_cubic.c ++++ b/net/ipv4/tcp_cubic.c +@@ -153,6 +153,27 @@ static void bictcp_init(struct sock *sk) + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; + } + ++static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) ++{ ++ if (event == CA_EVENT_TX_START) { ++ struct bictcp *ca = inet_csk_ca(sk); ++ u32 now = tcp_time_stamp; ++ s32 delta; ++ ++ delta = now - tcp_sk(sk)->lsndtime; ++ ++ /* We were application limited (idle) for a while. ++ * Shift epoch_start to keep cwnd growth to cubic curve. ++ */ ++ if (ca->epoch_start && delta > 0) { ++ ca->epoch_start += delta; ++ if (after(ca->epoch_start, now)) ++ ca->epoch_start = now; ++ } ++ return; ++ } ++} ++ + /* calculate the cubic root of x using a table lookup followed by one + * Newton-Raphson iteration. + * Avg err ~= 0.195% +@@ -439,6 +460,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { + .cong_avoid = bictcp_cong_avoid, + .set_state = bictcp_state, + .undo_cwnd = bictcp_undo_cwnd, ++ .cwnd_event = bictcp_cwnd_event, + .pkts_acked = bictcp_acked, + .owner = THIS_MODULE, + .name = "cubic",