@@ -316,15 +316,15 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
316316 return NULL ;
317317}
318318
319- static inline bool mptcp_skb_can_collapse_to (const struct mptcp_sock * msk ,
320- const struct sk_buff * skb ,
321- const struct mptcp_ext * mpext )
319+ static bool mptcp_skb_can_collapse_to (u64 write_seq ,
320+ const struct sk_buff * skb ,
321+ const struct mptcp_ext * mpext )
322322{
323323 if (!tcp_skb_can_collapse_to (skb ))
324324 return false;
325325
326326 /* can collapse only if MPTCP level sequence is in order */
327- return mpext && mpext -> data_seq + mpext -> data_len == msk -> write_seq ;
327+ return mpext && mpext -> data_seq + mpext -> data_len == write_seq ;
328328}
329329
330330static bool mptcp_frag_can_collapse_to (const struct mptcp_sock * msk ,
@@ -417,23 +417,28 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
417417}
418418
419419static int mptcp_sendmsg_frag (struct sock * sk , struct sock * ssk ,
420- struct msghdr * msg , long * timeo , int * pmss_now ,
420+ struct msghdr * msg , struct mptcp_data_frag * dfrag ,
421+ long * timeo , int * pmss_now ,
421422 int * ps_goal )
422423{
423424 int mss_now , avail_size , size_goal , offset , ret , frag_truesize = 0 ;
424425 bool dfrag_collapsed , can_collapse = false;
425426 struct mptcp_sock * msk = mptcp_sk (sk );
426427 struct mptcp_ext * mpext = NULL ;
427- struct mptcp_data_frag * dfrag ;
428+ bool retransmission = !! dfrag ;
428429 struct sk_buff * skb , * tail ;
429430 struct page_frag * pfrag ;
431+ struct page * page ;
432+ u64 * write_seq ;
430433 size_t psize ;
431434
432435 /* use the mptcp page cache so that we can easily move the data
433436 * from one substream to another, but do per subflow memory accounting
437+ * Note: pfrag is used only !retransmission, but the compiler if
438+ * fooled into a warning if we don't init here
434439 */
435440 pfrag = sk_page_frag (sk );
436- while (! mptcp_page_frag_refill (ssk , pfrag ) ||
441+ while ((! retransmission && ! mptcp_page_frag_refill (ssk , pfrag ) ) ||
437442 !mptcp_ext_cache_refill (msk )) {
438443 ret = sk_stream_wait_memory (ssk , timeo );
439444 if (ret )
@@ -447,6 +452,13 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
447452 if (unlikely (__mptcp_needs_tcp_fallback (msk )))
448453 return 0 ;
449454 }
455+ if (!retransmission ) {
456+ write_seq = & msk -> write_seq ;
457+ page = pfrag -> page ;
458+ } else {
459+ write_seq = & dfrag -> data_seq ;
460+ page = dfrag -> page ;
461+ }
450462
451463 /* compute copy limit */
452464 mss_now = tcp_send_mss (ssk , & size_goal , msg -> msg_flags );
@@ -464,63 +476,74 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
464476 * SSN association set here
465477 */
466478 can_collapse = (size_goal - skb -> len > 0 ) &&
467- mptcp_skb_can_collapse_to (msk , skb , mpext );
479+ mptcp_skb_can_collapse_to (* write_seq , skb , mpext );
468480 if (!can_collapse )
469481 TCP_SKB_CB (skb )-> eor = 1 ;
470482 else
471483 avail_size = size_goal - skb -> len ;
472484 }
473485
474- /* reuse tail pfrag, if possible, or carve a new one from the page
475- * allocator
476- */
477- dfrag = mptcp_rtx_tail (sk );
478- offset = pfrag -> offset ;
479- dfrag_collapsed = mptcp_frag_can_collapse_to (msk , pfrag , dfrag );
480- if (!dfrag_collapsed ) {
481- dfrag = mptcp_carve_data_frag (msk , pfrag , offset );
486+ if (!retransmission ) {
487+ /* reuse tail pfrag, if possible, or carve a new one from the
488+ * page allocator
489+ */
490+ dfrag = mptcp_rtx_tail (sk );
491+ offset = pfrag -> offset ;
492+ dfrag_collapsed = mptcp_frag_can_collapse_to (msk , pfrag , dfrag );
493+ if (!dfrag_collapsed ) {
494+ dfrag = mptcp_carve_data_frag (msk , pfrag , offset );
495+ offset = dfrag -> offset ;
496+ frag_truesize = dfrag -> overhead ;
497+ }
498+ psize = min_t (size_t , pfrag -> size - offset , avail_size );
499+
500+ /* Copy to page */
501+ pr_debug ("left=%zu" , msg_data_left (msg ));
502+ psize = copy_page_from_iter (pfrag -> page , offset ,
503+ min_t (size_t , msg_data_left (msg ),
504+ psize ),
505+ & msg -> msg_iter );
506+ pr_debug ("left=%zu" , msg_data_left (msg ));
507+ if (!psize )
508+ return - EINVAL ;
509+
510+ if (!sk_wmem_schedule (sk , psize + dfrag -> overhead ))
511+ return - ENOMEM ;
512+ } else {
482513 offset = dfrag -> offset ;
483- frag_truesize = dfrag -> overhead ;
514+ psize = min_t ( size_t , dfrag -> data_len , avail_size ) ;
484515 }
485- psize = min_t (size_t , pfrag -> size - offset , avail_size );
486-
487- /* Copy to page */
488- pr_debug ("left=%zu" , msg_data_left (msg ));
489- psize = copy_page_from_iter (pfrag -> page , offset ,
490- min_t (size_t , msg_data_left (msg ), psize ),
491- & msg -> msg_iter );
492- pr_debug ("left=%zu" , msg_data_left (msg ));
493- if (!psize )
494- return - EINVAL ;
495-
496- if (!sk_wmem_schedule (sk , psize + dfrag -> overhead ))
497- return - ENOMEM ;
498516
499517 /* tell the TCP stack to delay the push so that we can safely
500518 * access the skb after the sendpages call
501519 */
502- ret = do_tcp_sendpages (ssk , pfrag -> page , offset , psize ,
520+ ret = do_tcp_sendpages (ssk , page , offset , psize ,
503521 msg -> msg_flags | MSG_SENDPAGE_NOTLAST );
504522 if (ret <= 0 )
505523 return ret ;
506524
507525 frag_truesize += ret ;
508- if (unlikely (ret < psize ))
509- iov_iter_revert (& msg -> msg_iter , psize - ret );
526+ if (!retransmission ) {
527+ if (unlikely (ret < psize ))
528+ iov_iter_revert (& msg -> msg_iter , psize - ret );
510529
511- /* send successful, keep track of sent data for mptcp-level
512- * retransmission
513- */
514- dfrag -> data_len += ret ;
515- if (!dfrag_collapsed ) {
516- get_page (dfrag -> page );
517- list_add_tail (& dfrag -> list , & msk -> rtx_queue );
518- }
530+ /* send successful, keep track of sent data for mptcp-level
531+ * retransmission
532+ */
533+ dfrag -> data_len += ret ;
534+ if (!dfrag_collapsed ) {
535+ get_page (dfrag -> page );
536+ list_add_tail (& dfrag -> list , & msk -> rtx_queue );
537+ sk_wmem_queued_add (sk , frag_truesize );
538+ } else {
539+ sk_wmem_queued_add (sk , ret );
540+ }
519541
520- /* charge data on mptcp rtx queue to the master socket
521- * Note: we charge such data both to sk and ssk
522- */
523- sk -> sk_forward_alloc -= frag_truesize ;
542+ /* charge data on mptcp rtx queue to the master socket
543+ * Note: we charge such data both to sk and ssk
544+ */
545+ sk -> sk_forward_alloc -= frag_truesize ;
546+ }
524547
525548 /* if the tail skb extension is still the cached one, collapsing
526549 * really happened. Note: we can't check for 'same skb' as the sk_buff
@@ -539,7 +562,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
539562 msk -> cached_ext = NULL ;
540563
541564 memset (mpext , 0 , sizeof (* mpext ));
542- mpext -> data_seq = msk -> write_seq ;
565+ mpext -> data_seq = * write_seq ;
543566 mpext -> subflow_seq = mptcp_subflow_ctx (ssk )-> rel_write_seq ;
544567 mpext -> data_len = ret ;
545568 mpext -> use_map = 1 ;
@@ -550,8 +573,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
550573 mpext -> dsn64 );
551574
552575out :
553- pfrag -> offset += frag_truesize ;
554- msk -> write_seq += ret ;
576+ if (!retransmission )
577+ pfrag -> offset += frag_truesize ;
578+ * write_seq += ret ;
555579 mptcp_subflow_ctx (ssk )-> rel_write_seq += ret ;
556580
557581 return ret ;
@@ -663,7 +687,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
663687
664688 lock_sock (ssk );
665689 while (msg_data_left (msg )) {
666- ret = mptcp_sendmsg_frag (sk , ssk , msg , & timeo , & mss_now ,
690+ ret = mptcp_sendmsg_frag (sk , ssk , msg , NULL , & timeo , & mss_now ,
667691 & size_goal );
668692 if (ret < 0 )
669693 break ;
@@ -974,6 +998,7 @@ static int mptcp_init_sock(struct sock *sk)
974998 return ret ;
975999
9761000 sk_sockets_allocated_inc (sk );
1001+ sk -> sk_sndbuf = sock_net (sk )-> ipv4 .sysctl_tcp_wmem [2 ];
9771002
9781003 if (!mptcp_is_enabled (sock_net (sk )))
9791004 return - ENOPROTOOPT ;
0 commit comments