Skip to content

Commit

Permalink
mptcp: Add handling of incoming MP_JOIN requests
Browse files Browse the repository at this point in the history
Process the MP_JOIN option in a SYN packet with the same flow
as MP_CAPABLE but when the third ACK is received add the
subflow to the MPTCP socket subflow list instead of adding it to
the TCP socket accept queue.

The subflow is added at the end of the subflow list so it will not
interfere with the existing subflows operation and no data is
expected to be transmitted on it.

Co-developed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Peter Krystad authored and davem330 committed Mar 30, 2020
1 parent 1b1c7a0 commit f296234
Show file tree
Hide file tree
Showing 8 changed files with 390 additions and 46 deletions.
8 changes: 7 additions & 1 deletion include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,13 @@ struct mptcp_options_received {
add_addr : 1,
rm_addr : 1,
family : 4,
echo : 1;
echo : 1,
backup : 1;
u32 token;
u32 nonce;
u64 thmac;
u8 hmac[20];
u8 join_id;
u8 use_map:1,
dsn64:1,
data_fin:1,
Expand Down
11 changes: 11 additions & 0 deletions include/net/mptcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ struct mptcp_out_options {
u8 addr_id;
u64 ahmac;
u8 rm_id;
u8 join_id;
u8 backup;
u32 nonce;
u64 thmac;
struct mptcp_ext ext_copy;
#endif
};
Expand Down Expand Up @@ -115,6 +119,8 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
skb_ext_find(from, SKB_EXT_MPTCP));
}

bool mptcp_sk_is_subflow(const struct sock *sk);

#else

static inline void mptcp_init(void)
Expand Down Expand Up @@ -181,6 +187,11 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
return true;
}

static inline bool mptcp_sk_is_subflow(const struct sock *sk)
{
return false;
}

#endif /* CONFIG_MPTCP */

#if IS_ENABLED(CONFIG_MPTCP_IPV6)
Expand Down
6 changes: 6 additions & 0 deletions net/ipv4/tcp_minisocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!child)
goto listen_overflow;

if (own_req && sk_is_mptcp(child) && mptcp_sk_is_subflow(child)) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
inet_csk_reqsk_queue_drop_and_put(sk, req);
return child;
}

sock_rps_save_rxhash(child, skb);
tcp_synack_rtt_meas(child, req);
*req_stolen = !own_req;
Expand Down
107 changes: 95 additions & 12 deletions net/mptcp/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,38 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
mp_opt->rcvr_key, mp_opt->data_len);
break;

case MPTCPOPT_MP_JOIN:
mp_opt->mp_join = 1;
if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
mp_opt->token = get_unaligned_be32(ptr);
ptr += 4;
mp_opt->nonce = get_unaligned_be32(ptr);
ptr += 4;
pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
mp_opt->backup, mp_opt->join_id,
mp_opt->token, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
mp_opt->thmac = get_unaligned_be64(ptr);
ptr += 8;
mp_opt->nonce = get_unaligned_be32(ptr);
ptr += 4;
pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
mp_opt->backup, mp_opt->join_id,
mp_opt->thmac, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
ptr += 2;
memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
pr_debug("MP_JOIN hmac");
} else {
pr_warn("MP_JOIN bad option size");
mp_opt->mp_join = 0;
}
break;

case MPTCPOPT_DSS:
pr_debug("DSS");
ptr++;
Expand Down Expand Up @@ -572,37 +604,80 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
return true;
} else if (subflow_req->mp_join) {
opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
opts->backup = subflow_req->backup;
opts->join_id = subflow_req->local_id;
opts->thmac = subflow_req->thmac;
opts->nonce = subflow_req->local_nonce;
pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
subflow_req, opts->backup, opts->join_id,
opts->thmac, opts->nonce);
*size = TCPOLEN_MPTCP_MPJ_SYNACK;
return true;
}
return false;
}

static bool check_fully_established(struct mptcp_subflow_context *subflow,
static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
struct mptcp_subflow_context *subflow,
struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
/* here we can process OoO, in-window pkts, only in-sequence 4th ack
* are relevant
* will make the subflow fully established
*/
if (likely(subflow->fully_established ||
TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1))
return true;
if (likely(subflow->fully_established)) {
/* on passive sockets, check for 3rd ack retransmission
* note that msk is always set by subflow_syn_recv_sock()
* for mp_join subflows
*/
if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && mp_opt->mp_join &&
READ_ONCE(msk->pm.server_side))
tcp_send_ack(sk);
goto fully_established;
}

/* we should process OoO packets before the first subflow is fully
* established, but not expected for MP_JOIN subflows
*/
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
return subflow->mp_capable;

if (mp_opt->use_ack)
if (mp_opt->use_ack) {
/* subflows are fully established as soon as we get any
* additional ack.
*/
subflow->fully_established = 1;
goto fully_established;
}

if (subflow->can_ack)
return true;
WARN_ON_ONCE(subflow->can_ack);

/* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP
*/
if (!mp_opt->mp_capable) {
subflow->mp_capable = 0;
tcp_sk(mptcp_subflow_tcp_sock(subflow))->is_mptcp = 0;
tcp_sk(sk)->is_mptcp = 0;
return false;
}

subflow->fully_established = 1;
subflow->remote_key = mp_opt->sndr_key;
subflow->can_ack = 1;

fully_established:
if (likely(subflow->pm_notified))
return true;

subflow->pm_notified = 1;
if (subflow->mp_join)
mptcp_pm_subflow_established(msk, subflow);
else
mptcp_pm_fully_established(msk);
return true;
}

Expand Down Expand Up @@ -641,7 +716,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_ext *mpext;

mp_opt = &opt_rx->mptcp;
if (!check_fully_established(subflow, skb, mp_opt))
if (!check_fully_established(msk, sk, subflow, skb, mp_opt))
return;

if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) {
Expand Down Expand Up @@ -700,8 +775,6 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
}

mpext->data_fin = mp_opt->data_fin;

mptcp_pm_fully_established(msk);
}

void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
Expand Down Expand Up @@ -787,6 +860,16 @@ void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
0, opts->rm_id);
}

if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYNACK,
opts->backup, opts->join_id);
put_unaligned_be64(opts->thmac, ptr);
ptr += 2;
put_unaligned_be32(opts->nonce, ptr);
ptr += 1;
}

if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
struct mptcp_ext *mpext = &opts->ext_copy;
u8 len = TCPOLEN_MPTCP_DSS_BASE;
Expand Down
96 changes: 79 additions & 17 deletions net/mptcp/protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,19 +104,6 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
return ssock;
}

static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;

sock_owned_by_me((const struct sock *)msk);

mptcp_for_each_subflow(msk, subflow) {
return mptcp_subflow_tcp_sock(subflow);
}

return NULL;
}

static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
struct sk_buff *skb,
unsigned int offset, size_t copy_len)
Expand Down Expand Up @@ -391,6 +378,43 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
return ret;
}

static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
struct sock *backup = NULL;

sock_owned_by_me((const struct sock *)msk);

mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

if (!sk_stream_memory_free(ssk)) {
struct socket *sock = ssk->sk_socket;

if (sock) {
clear_bit(MPTCP_SEND_SPACE, &msk->flags);
smp_mb__after_atomic();

/* enables sk->write_space() callbacks */
set_bit(SOCK_NOSPACE, &sock->flags);
}

return NULL;
}

if (subflow->backup) {
if (!backup)
backup = ssk;

continue;
}

return ssk;
}

return backup;
}

static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
{
struct socket *sock;
Expand Down Expand Up @@ -438,10 +462,17 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return ret >= 0 ? ret + copied : (copied ? copied : ret);
}

ssk = mptcp_subflow_get(msk);
if (!ssk) {
release_sock(sk);
return -ENOTCONN;
ssk = mptcp_subflow_get_send(msk);
while (!sk_stream_memory_free(sk) || !ssk) {
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;

ssk = mptcp_subflow_get_send(msk);
if (list_empty(&msk->conn_list)) {
ret = -ENOTCONN;
goto out;
}
}

pr_debug("conn_list->subflow=%p", ssk);
Expand Down Expand Up @@ -1070,6 +1101,37 @@ static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
write_unlock_bh(&sk->sk_callback_lock);
}

bool mptcp_finish_join(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct sock *parent = (void *)msk;
struct socket *parent_sock;

pr_debug("msk=%p, subflow=%p", msk, subflow);

/* mptcp socket already closing? */
if (inet_sk_state_load(parent) != TCP_ESTABLISHED)
return false;

if (!msk->pm.server_side)
return true;

/* passive connection, attach to msk socket */
parent_sock = READ_ONCE(parent->sk_socket);
if (parent_sock && !sk->sk_socket)
mptcp_sock_graft(sk, parent_sock);

return mptcp_pm_allow_new_subflow(msk);
}

bool mptcp_sk_is_subflow(const struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);

return subflow->mp_join == 1;
}

static bool mptcp_memory_free(const struct sock *sk, int wake)
{
struct mptcp_sock *msk = mptcp_sk(sk);
Expand Down

0 comments on commit f296234

Please sign in to comment.