Skip to content

Commit 38e3bfa

Browse files
committed
Merge branch 'mptcp-improve-backup-subflows'
Mat Martineau says: ==================== mptcp: Improve use of backup subflows Multipath TCP combines multiple TCP subflows in to one stream, and the MPTCP-level socket must decide which subflow to use when sending (or resending) chunks of data. The choice of the "best" subflow to transmit on can vary depending on the priority (normal or backup) for each subflow and how well the subflow is performing. In order to improve MPTCP performance when some subflows are failing, this patch set changes how backup subflows are utilized and introduces tracking of "stale" subflows that are still connected but not making progress. Patch 1 adjusts MPTCP-level retransmit timeouts to use data from all subflows. Patch 2 makes MPTCP-level retransmissions less aggressive to avoid resending data that's still queued at the TCP level. Patch 3 changes the way pending data is handled when subflows are closed. Unacked MPTCP-level data still in the subflow tx queue is immediately moved to another subflow for transmission instead of waiting for MPTCP-level timeouts to trigger retransmission. Patch 4 has some sysctl code cleanup. Patches 5 and 6 add tracking of "stale" subflows, so only underlying TCP subflow connections that appear to be making progress are considered when selecting a subflow to (re)transmit data. How fast a subflow goes stale is configurable with a per-namespace sysctl. Related MIBS are added too. Patch 7 makes sure the backup flag is always correctly recorded when the MP_JOIN SYN/ACK is received for an added subflow. Patch 8 adds more test cases for backup subflows and stale subflows. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents e5f3155 + 7d1e6f1 commit 38e3bfa

File tree

11 files changed

+464
-88
lines changed

11 files changed

+464
-88
lines changed

Documentation/networking/mptcp-sysctl.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,15 @@ allow_join_initial_addr_port - BOOLEAN
4545
This is a per-namespace sysctl.
4646

4747
Default: 1
48+
49+
stale_loss_cnt - INTEGER
50+
The number of MPTCP-level retransmission intervals with no traffic and
51+
pending outstanding data on a given subflow required to declare it stale.
52+
The packet scheduler ignores stale subflows.
53+
A low stale_loss_cnt value allows for fast active-backup switch-over,
54+
an high value maximize links utilization on edge scenarios e.g. lossy
55+
link with high BER or peer pausing the data processing.
56+
57+
This is a per-namespace sysctl.
58+
59+
Default: 4

net/mptcp/ctrl.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,43 +21,50 @@ struct mptcp_pernet {
2121
struct ctl_table_header *ctl_table_hdr;
2222
#endif
2323

24-
u8 mptcp_enabled;
2524
unsigned int add_addr_timeout;
25+
unsigned int stale_loss_cnt;
26+
u8 mptcp_enabled;
2627
u8 checksum_enabled;
2728
u8 allow_join_initial_addr_port;
2829
};
2930

30-
static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
31+
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
3132
{
3233
return net_generic(net, mptcp_pernet_id);
3334
}
3435

35-
int mptcp_is_enabled(struct net *net)
36+
int mptcp_is_enabled(const struct net *net)
3637
{
3738
return mptcp_get_pernet(net)->mptcp_enabled;
3839
}
3940

40-
unsigned int mptcp_get_add_addr_timeout(struct net *net)
41+
unsigned int mptcp_get_add_addr_timeout(const struct net *net)
4142
{
4243
return mptcp_get_pernet(net)->add_addr_timeout;
4344
}
4445

45-
int mptcp_is_checksum_enabled(struct net *net)
46+
int mptcp_is_checksum_enabled(const struct net *net)
4647
{
4748
return mptcp_get_pernet(net)->checksum_enabled;
4849
}
4950

50-
int mptcp_allow_join_id0(struct net *net)
51+
int mptcp_allow_join_id0(const struct net *net)
5152
{
5253
return mptcp_get_pernet(net)->allow_join_initial_addr_port;
5354
}
5455

56+
unsigned int mptcp_stale_loss_cnt(const struct net *net)
57+
{
58+
return mptcp_get_pernet(net)->stale_loss_cnt;
59+
}
60+
5561
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
5662
{
5763
pernet->mptcp_enabled = 1;
5864
pernet->add_addr_timeout = TCP_RTO_MAX;
5965
pernet->checksum_enabled = 0;
6066
pernet->allow_join_initial_addr_port = 1;
67+
pernet->stale_loss_cnt = 4;
6168
}
6269

6370
#ifdef CONFIG_SYSCTL
@@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
95102
.extra1 = SYSCTL_ZERO,
96103
.extra2 = SYSCTL_ONE
97104
},
105+
{
106+
.procname = "stale_loss_cnt",
107+
.maxlen = sizeof(unsigned int),
108+
.mode = 0644,
109+
.proc_handler = proc_douintvec_minmax,
110+
},
98111
{}
99112
};
100113

@@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
114127
table[1].data = &pernet->add_addr_timeout;
115128
table[2].data = &pernet->checksum_enabled;
116129
table[3].data = &pernet->allow_join_initial_addr_port;
130+
table[4].data = &pernet->stale_loss_cnt;
117131

118132
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
119133
if (!hdr)

net/mptcp/mib.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
4545
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
4646
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
4747
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
48+
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
49+
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
4850
SNMP_MIB_SENTINEL
4951
};
5052

net/mptcp/mib.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ enum linux_mptcp_mib_field {
3838
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
3939
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
4040
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
41+
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
42+
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
4143
__MPTCP_MIB_MAX
4244
};
4345

net/mptcp/options.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -975,9 +975,11 @@ static void ack_update_msk(struct mptcp_sock *msk,
975975
old_snd_una = msk->snd_una;
976976
new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
977977

978-
/* ACK for data not even sent yet? Ignore. */
979-
if (after64(new_snd_una, snd_nxt))
980-
new_snd_una = old_snd_una;
978+
/* ACK for data not even sent yet and even above recovery bound? Ignore.*/
979+
if (unlikely(after64(new_snd_una, snd_nxt))) {
980+
if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
981+
new_snd_una = old_snd_una;
982+
}
981983

982984
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
983985

net/mptcp/pm.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include <net/mptcp.h>
1111
#include "protocol.h"
1212

13+
#include "mib.h"
14+
1315
/* path manager command handlers */
1416

1517
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
@@ -308,6 +310,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
308310
return mptcp_pm_nl_get_local_id(msk, skc);
309311
}
310312

313+
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
314+
{
315+
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
316+
u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
317+
318+
/* keep track of rtx periods with no progress */
319+
if (!subflow->stale_count) {
320+
subflow->stale_rcv_tstamp = rcv_tstamp;
321+
subflow->stale_count++;
322+
} else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
323+
if (subflow->stale_count < U8_MAX)
324+
subflow->stale_count++;
325+
mptcp_pm_nl_subflow_chk_stale(msk, ssk);
326+
} else {
327+
subflow->stale_count = 0;
328+
mptcp_subflow_set_active(subflow);
329+
}
330+
}
331+
311332
void mptcp_pm_data_init(struct mptcp_sock *msk)
312333
{
313334
msk->pm.add_addr_signaled = 0;

net/mptcp/pm_netlink.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct pm_nl_pernet {
4646
spinlock_t lock;
4747
struct list_head local_addr_list;
4848
unsigned int addrs;
49+
unsigned int stale_loss_cnt;
4950
unsigned int add_addr_signal_max;
5051
unsigned int add_addr_accept_max;
5152
unsigned int local_addr_max;
@@ -899,6 +900,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
899900
[MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
900901
};
901902

903+
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
904+
{
905+
struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
906+
struct sock *sk = (struct sock *)msk;
907+
unsigned int active_max_loss_cnt;
908+
struct net *net = sock_net(sk);
909+
unsigned int stale_loss_cnt;
910+
bool slow;
911+
912+
stale_loss_cnt = mptcp_stale_loss_cnt(net);
913+
if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt)
914+
return;
915+
916+
/* look for another available subflow not in loss state */
917+
active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1);
918+
mptcp_for_each_subflow(msk, iter) {
919+
if (iter != subflow && mptcp_subflow_active(iter) &&
920+
iter->stale_count < active_max_loss_cnt) {
921+
/* we have some alternatives, try to mark this subflow as idle ...*/
922+
slow = lock_sock_fast(ssk);
923+
if (!tcp_rtx_and_write_queues_empty(ssk)) {
924+
subflow->stale = 1;
925+
__mptcp_retransmit_pending_data(sk);
926+
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
927+
}
928+
unlock_sock_fast(ssk, slow);
929+
930+
/* always try to push the pending data regarless of re-injections:
931+
* we can possibly use backup subflows now, and subflow selection
932+
* is cheap under the msk socket lock
933+
*/
934+
__mptcp_push_pending(sk, 0);
935+
return;
936+
}
937+
}
938+
}
939+
902940
static int mptcp_pm_family_to_addr(int family)
903941
{
904942
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1922,6 +1960,7 @@ static int __net_init pm_nl_init_net(struct net *net)
19221960

19231961
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
19241962
pernet->next_id = 1;
1963+
pernet->stale_loss_cnt = 4;
19251964
spin_lock_init(&pernet->lock);
19261965

19271966
/* No need to initialize other pernet fields, the struct is zeroed at

0 commit comments

Comments
 (0)