Skip to content

Commit 988998a

Browse files
committed
Merge branch 'mptcp-tcp-fallback'
Mat Martineau says: ==================== mptcp: TCP fallback for established connections RFC 8684 allows some MPTCP connections to fall back to regular TCP when the MPTCP DSS checksum detects middlebox interference, there is only a single subflow, and there is no unacknowledged out-of-sequence data. When this condition is detected, the stack sends a MPTCP DSS option with an "infinite mapping" to signal that a fallback is happening, and the peers will stop sending MPTCP options in their TCP headers. The Linux MPTCP stack has not yet supported this type of fallback, instead closing the connection when the MPTCP checksum fails. This series adds support for fallback to regular TCP in a more limited scenario, for only MPTCP connections that have never connected additional subflows or transmitted out-of-sequence data. The selftests are also updated to check new MIBs that track infinite mappings. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 31693d0 + 8bd03be commit 988998a

File tree

10 files changed

+121
-31
lines changed

10 files changed

+121
-31
lines changed

include/net/mptcp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ struct mptcp_ext {
3535
frozen:1,
3636
reset_transient:1;
3737
u8 reset_reason:4,
38-
csum_reqd:1;
38+
csum_reqd:1,
39+
infinite_map:1;
3940
};
4041

4142
#define MPTCP_RM_IDS_MAX 8

include/trace/events/mptcp.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
8484
__field(u8, reset_transient)
8585
__field(u8, reset_reason)
8686
__field(u8, csum_reqd)
87+
__field(u8, infinite_map)
8788
),
8889

8990
TP_fast_assign(
@@ -102,17 +103,18 @@ DECLARE_EVENT_CLASS(mptcp_dump_mpext,
102103
__entry->reset_transient = mpext->reset_transient;
103104
__entry->reset_reason = mpext->reset_reason;
104105
__entry->csum_reqd = mpext->csum_reqd;
106+
__entry->infinite_map = mpext->infinite_map;
105107
),
106108

107-
TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u",
109+
TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u infinite_map=%u",
108110
__entry->data_ack, __entry->data_seq,
109111
__entry->subflow_seq, __entry->data_len,
110112
__entry->csum, __entry->use_map,
111113
__entry->dsn64, __entry->data_fin,
112114
__entry->use_ack, __entry->ack64,
113115
__entry->mpc_map, __entry->frozen,
114116
__entry->reset_transient, __entry->reset_reason,
115-
__entry->csum_reqd)
117+
__entry->csum_reqd, __entry->infinite_map)
116118
);
117119

118120
DEFINE_EVENT(mptcp_dump_mpext, mptcp_sendmsg_frag,

net/mptcp/mib.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
2424
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
2525
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
2626
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
27+
SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
2728
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
2829
SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
2930
SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),

net/mptcp/mib.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ enum linux_mptcp_mib_field {
1717
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
1818
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
1919
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
20+
MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
2021
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
2122
MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
2223
MPTCP_MIB_DATACSUMERR, /* The data checksum fail */

net/mptcp/options.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
825825

826826
opts->suboptions = 0;
827827

828-
if (unlikely(__mptcp_check_fallback(msk)))
828+
if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb)))
829829
return false;
830830

831831
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
@@ -1340,8 +1340,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
13401340
put_unaligned_be32(mpext->subflow_seq, ptr);
13411341
ptr += 1;
13421342
if (opts->csum_reqd) {
1343+
/* data_len == 0 is reserved for the infinite mapping,
1344+
* the checksum will also be set to 0.
1345+
*/
13431346
put_unaligned_be32(mpext->data_len << 16 |
1344-
mptcp_make_csum(mpext), ptr);
1347+
(mpext->data_len ? mptcp_make_csum(mpext) : 0),
1348+
ptr);
13451349
} else {
13461350
put_unaligned_be32(mpext->data_len << 16 |
13471351
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);

net/mptcp/pm.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,13 @@ void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
285285

286286
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
287287
{
288+
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
289+
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
290+
288291
pr_debug("fail_seq=%llu", fail_seq);
292+
293+
if (!mptcp_has_another_subflow(sk) && READ_ONCE(msk->allow_infinite_fallback))
294+
subflow->send_infinite_map = 1;
289295
}
290296

291297
/* path manager helpers */

net/mptcp/protocol.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,6 +1229,22 @@ static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
12291229
mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
12301230
}
12311231

1232+
static void mptcp_update_infinite_map(struct mptcp_sock *msk,
1233+
struct sock *ssk,
1234+
struct mptcp_ext *mpext)
1235+
{
1236+
if (!mpext)
1237+
return;
1238+
1239+
mpext->infinite_map = 1;
1240+
mpext->data_len = 0;
1241+
1242+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
1243+
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
1244+
pr_fallback(msk);
1245+
__mptcp_do_fallback(msk);
1246+
}
1247+
12321248
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
12331249
struct mptcp_data_frag *dfrag,
12341250
struct mptcp_sendmsg_info *info)
@@ -1360,6 +1376,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
13601376
out:
13611377
if (READ_ONCE(msk->csum_enabled))
13621378
mptcp_update_data_checksum(skb, copy);
1379+
if (mptcp_subflow_ctx(ssk)->send_infinite_map)
1380+
mptcp_update_infinite_map(msk, ssk, mpext);
13631381
trace_mptcp_sendmsg_frag(mpext);
13641382
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
13651383
return copy;
@@ -2465,6 +2483,7 @@ static void __mptcp_retrans(struct sock *sk)
24652483
dfrag->already_sent = max(dfrag->already_sent, info.sent);
24662484
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
24672485
info.size_goal);
2486+
WRITE_ONCE(msk->allow_infinite_fallback, false);
24682487
}
24692488

24702489
release_sock(ssk);
@@ -2539,6 +2558,7 @@ static int __mptcp_init_sock(struct sock *sk)
25392558
msk->first = NULL;
25402559
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
25412560
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
2561+
WRITE_ONCE(msk->allow_infinite_fallback, true);
25422562
msk->recovery = false;
25432563

25442564
mptcp_pm_data_init(msk);
@@ -3275,6 +3295,7 @@ bool mptcp_finish_join(struct sock *ssk)
32753295
}
32763296

32773297
subflow->map_seq = READ_ONCE(msk->ack_seq);
3298+
WRITE_ONCE(msk->allow_infinite_fallback, false);
32783299

32793300
out:
32803301
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);

net/mptcp/protocol.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ struct mptcp_sock {
263263
bool rcv_fastclose;
264264
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
265265
bool csum_enabled;
266+
bool allow_infinite_fallback;
266267
u8 recvmsg_inq:1,
267268
cork:1,
268269
nodelay:1;
@@ -440,6 +441,7 @@ struct mptcp_subflow_context {
440441
send_mp_prio : 1,
441442
send_mp_fail : 1,
442443
send_fastclose : 1,
444+
send_infinite_map : 1,
443445
rx_eof : 1,
444446
can_ack : 1, /* only after processing the remote a key */
445447
disposable : 1, /* ctx can be free at ulp release time */
@@ -876,6 +878,17 @@ static inline void mptcp_do_fallback(struct sock *sk)
876878

877879
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
878880

881+
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
882+
{
883+
struct mptcp_ext *mpext;
884+
885+
mpext = skb ? mptcp_get_ext(skb) : NULL;
886+
if (mpext && mpext->infinite_map)
887+
return true;
888+
889+
return false;
890+
}
891+
879892
static inline bool subflow_simultaneous_connect(struct sock *sk)
880893
{
881894
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);

net/mptcp/subflow.c

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
10061006

10071007
data_len = mpext->data_len;
10081008
if (data_len == 0) {
1009+
pr_debug("infinite mapping received");
10091010
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
1011+
subflow->map_data_len = 0;
10101012
return MAPPING_INVALID;
10111013
}
10121014

@@ -1203,35 +1205,39 @@ static bool subflow_check_data_avail(struct sock *ssk)
12031205
return false;
12041206

12051207
fallback:
1206-
/* RFC 8684 section 3.7. */
1207-
if (subflow->send_mp_fail) {
1208-
if (mptcp_has_another_subflow(ssk)) {
1209-
while ((skb = skb_peek(&ssk->sk_receive_queue)))
1210-
sk_eat_skb(ssk, skb);
1208+
if (!__mptcp_check_fallback(msk)) {
1209+
/* RFC 8684 section 3.7. */
1210+
if (subflow->send_mp_fail) {
1211+
if (mptcp_has_another_subflow(ssk) ||
1212+
!READ_ONCE(msk->allow_infinite_fallback)) {
1213+
ssk->sk_err = EBADMSG;
1214+
tcp_set_state(ssk, TCP_CLOSE);
1215+
subflow->reset_transient = 0;
1216+
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
1217+
tcp_send_active_reset(ssk, GFP_ATOMIC);
1218+
while ((skb = skb_peek(&ssk->sk_receive_queue)))
1219+
sk_eat_skb(ssk, skb);
1220+
}
1221+
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
1222+
return true;
12111223
}
1212-
ssk->sk_err = EBADMSG;
1213-
tcp_set_state(ssk, TCP_CLOSE);
1214-
subflow->reset_transient = 0;
1215-
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
1216-
tcp_send_active_reset(ssk, GFP_ATOMIC);
1217-
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
1218-
return true;
1219-
}
12201224

1221-
if (subflow->mp_join || subflow->fully_established) {
1222-
/* fatal protocol error, close the socket.
1223-
* subflow_error_report() will introduce the appropriate barriers
1224-
*/
1225-
ssk->sk_err = EBADMSG;
1226-
tcp_set_state(ssk, TCP_CLOSE);
1227-
subflow->reset_transient = 0;
1228-
subflow->reset_reason = MPTCP_RST_EMPTCP;
1229-
tcp_send_active_reset(ssk, GFP_ATOMIC);
1230-
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
1231-
return false;
1225+
if ((subflow->mp_join || subflow->fully_established) && subflow->map_data_len) {
1226+
/* fatal protocol error, close the socket.
1227+
* subflow_error_report() will introduce the appropriate barriers
1228+
*/
1229+
ssk->sk_err = EBADMSG;
1230+
tcp_set_state(ssk, TCP_CLOSE);
1231+
subflow->reset_transient = 0;
1232+
subflow->reset_reason = MPTCP_RST_EMPTCP;
1233+
tcp_send_active_reset(ssk, GFP_ATOMIC);
1234+
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
1235+
return false;
1236+
}
1237+
1238+
__mptcp_do_fallback(msk);
12321239
}
12331240

1234-
__mptcp_do_fallback(msk);
12351241
skb = skb_peek(&ssk->sk_receive_queue);
12361242
subflow->map_valid = 1;
12371243
subflow->map_seq = READ_ONCE(msk->ack_seq);
@@ -1483,6 +1489,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
14831489
/* discard the subflow socket */
14841490
mptcp_sock_graft(ssk, sk->sk_socket);
14851491
iput(SOCK_INODE(sf));
1492+
WRITE_ONCE(msk->allow_infinite_fallback, false);
14861493
return err;
14871494

14881495
failed_unlink:

tools/testing/selftests/net/mptcp/mptcp_join.sh

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,38 @@ chk_rst_nr()
11061106
echo "$extra_msg"
11071107
}
11081108

1109+
chk_infi_nr()
1110+
{
1111+
local infi_tx=$1
1112+
local infi_rx=$2
1113+
local count
1114+
local dump_stats
1115+
1116+
printf "%-${nr_blank}s %s" " " "itx"
1117+
count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}')
1118+
[ -z "$count" ] && count=0
1119+
if [ "$count" != "$infi_tx" ]; then
1120+
echo "[fail] got $count infinite map[s] TX expected $infi_tx"
1121+
fail_test
1122+
dump_stats=1
1123+
else
1124+
echo -n "[ ok ]"
1125+
fi
1126+
1127+
echo -n " - infirx"
1128+
count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}')
1129+
[ -z "$count" ] && count=0
1130+
if [ "$count" != "$infi_rx" ]; then
1131+
echo "[fail] got $count infinite map[s] RX expected $infi_rx"
1132+
fail_test
1133+
dump_stats=1
1134+
else
1135+
echo "[ ok ]"
1136+
fi
1137+
1138+
[ "${dump_stats}" = 1 ] && dump_stats
1139+
}
1140+
11091141
chk_join_nr()
11101142
{
11111143
local syn_nr=$1
@@ -1115,7 +1147,8 @@ chk_join_nr()
11151147
local csum_ns2=${5:-0}
11161148
local fail_nr=${6:-0}
11171149
local rst_nr=${7:-0}
1118-
local corrupted_pkts=${8:-0}
1150+
local infi_nr=${8:-0}
1151+
local corrupted_pkts=${9:-0}
11191152
local count
11201153
local dump_stats
11211154
local with_cookie
@@ -1170,6 +1203,7 @@ chk_join_nr()
11701203
chk_csum_nr $csum_ns1 $csum_ns2
11711204
chk_fail_nr $fail_nr $fail_nr
11721205
chk_rst_nr $rst_nr $rst_nr
1206+
chk_infi_nr $infi_nr $infi_nr
11731207
fi
11741208
}
11751209

0 commit comments

Comments
 (0)