Skip to content

Commit 4890b68

Browse files
edumazetkuba-moo
authored andcommitted
net: keep sk->sk_forward_alloc as small as possible
Currently, tcp_memory_allocated can hit tcp_mem[] limits quite fast. Each TCP socket can forward allocate up to 2 MB of memory, even after flow became less active. 10,000 sockets can have reserved 20 GB of memory, and we have no shrinker in place to reclaim that. Instead of trying to reclaim the extra allocations in some places, just keep sk->sk_forward_alloc values as small as possible. This should not impact performance too much now we have per-cpu reserves: Changes to tcp_memory_allocated should not be too frequent. For sockets not using SO_RESERVE_MEM: - idle sockets (no packets in tx/rx queues) have zero forward alloc. - non idle sockets have a forward alloc smaller than one page. Note: - Removal of SK_RECLAIM_CHUNK and SK_RECLAIM_THRESHOLD is left to MPTCP maintainers as a follow up. Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 7c80b03 commit 4890b68

File tree

11 files changed

+7
-72
lines changed

11 files changed

+7
-72
lines changed

include/net/sock.h

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1627,49 +1627,24 @@ static inline void sk_mem_reclaim_final(struct sock *sk)
16271627
sk_mem_reclaim(sk);
16281628
}
16291629

1630-
static inline void sk_mem_reclaim_partial(struct sock *sk)
1631-
{
1632-
int reclaimable;
1633-
1634-
if (!sk_has_account(sk))
1635-
return;
1636-
1637-
reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
1638-
1639-
if (reclaimable > (int)PAGE_SIZE)
1640-
__sk_mem_reclaim(sk, reclaimable - 1);
1641-
}
1642-
16431630
static inline void sk_mem_charge(struct sock *sk, int size)
16441631
{
16451632
if (!sk_has_account(sk))
16461633
return;
16471634
sk->sk_forward_alloc -= size;
16481635
}
16491636

1650-
/* the following macros control memory reclaiming in sk_mem_uncharge()
1637+
/* the following macros control memory reclaiming in mptcp_rmem_uncharge()
16511638
*/
16521639
#define SK_RECLAIM_THRESHOLD (1 << 21)
16531640
#define SK_RECLAIM_CHUNK (1 << 20)
16541641

16551642
static inline void sk_mem_uncharge(struct sock *sk, int size)
16561643
{
1657-
int reclaimable;
1658-
16591644
if (!sk_has_account(sk))
16601645
return;
16611646
sk->sk_forward_alloc += size;
1662-
reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
1663-
1664-
/* Avoid a possible overflow.
1665-
* TCP send queues can make this happen, if sk_mem_reclaim()
1666-
* is not called and more than 2 GBytes are released at once.
1667-
*
1668-
* If we reach 2 MBytes, reclaim 1 MBytes right now, there is
1669-
* no need to hold that much forward allocation anyway.
1670-
*/
1671-
if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
1672-
__sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
1647+
sk_mem_reclaim(sk);
16731648
}
16741649

16751650
/*

net/core/datagram.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,6 @@ EXPORT_SYMBOL(skb_recv_datagram);
320320
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
321321
{
322322
consume_skb(skb);
323-
sk_mem_reclaim_partial(sk);
324323
}
325324
EXPORT_SYMBOL(skb_free_datagram);
326325

@@ -336,7 +335,6 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
336335
slow = lock_sock_fast(sk);
337336
sk_peek_offset_bwd(sk, len);
338337
skb_orphan(skb);
339-
sk_mem_reclaim_partial(sk);
340338
unlock_sock_fast(sk, slow);
341339

342340
/* skb is now orphaned, can be freed outside of locked section */
@@ -396,7 +394,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
396394
NULL);
397395

398396
kfree_skb(skb);
399-
sk_mem_reclaim_partial(sk);
400397
return err;
401398
}
402399
EXPORT_SYMBOL(skb_kill_datagram);

net/ipv4/tcp.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -858,9 +858,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
858858
{
859859
struct sk_buff *skb;
860860

861-
if (unlikely(tcp_under_memory_pressure(sk)))
862-
sk_mem_reclaim_partial(sk);
863-
864861
skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp);
865862
if (likely(skb)) {
866863
bool mem_scheduled;
@@ -2764,8 +2761,6 @@ void __tcp_close(struct sock *sk, long timeout)
27642761
__kfree_skb(skb);
27652762
}
27662763

2767-
sk_mem_reclaim(sk);
2768-
27692764
/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
27702765
if (sk->sk_state == TCP_CLOSE)
27712766
goto adjudge_to_death;
@@ -2873,7 +2868,6 @@ void __tcp_close(struct sock *sk, long timeout)
28732868
}
28742869
}
28752870
if (sk->sk_state != TCP_CLOSE) {
2876-
sk_mem_reclaim(sk);
28772871
if (tcp_check_oom(sk, 0)) {
28782872
tcp_set_state(sk, TCP_CLOSE);
28792873
tcp_send_active_reset(sk, GFP_ATOMIC);
@@ -2951,7 +2945,6 @@ void tcp_write_queue_purge(struct sock *sk)
29512945
}
29522946
tcp_rtx_queue_purge(sk);
29532947
INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
2954-
sk_mem_reclaim(sk);
29552948
tcp_clear_all_retrans_hints(tcp_sk(sk));
29562949
tcp_sk(sk)->packets_out = 0;
29572950
inet_csk(sk)->icsk_backoff = 0;

net/ipv4/tcp_input.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -805,7 +805,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
805805
* restart window, so that we send ACKs quickly.
806806
*/
807807
tcp_incr_quickack(sk, TCP_MAX_QUICKACKS);
808-
sk_mem_reclaim(sk);
809808
}
810809
}
811810
icsk->icsk_ack.lrcvtime = now;
@@ -4390,7 +4389,6 @@ void tcp_fin(struct sock *sk)
43904389
skb_rbtree_purge(&tp->out_of_order_queue);
43914390
if (tcp_is_sack(tp))
43924391
tcp_sack_reset(&tp->rx_opt);
4393-
sk_mem_reclaim(sk);
43944392

43954393
if (!sock_flag(sk, SOCK_DEAD)) {
43964394
sk->sk_state_change(sk);
@@ -5336,7 +5334,6 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
53365334
tcp_drop_reason(sk, rb_to_skb(node),
53375335
SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
53385336
if (!prev || goal <= 0) {
5339-
sk_mem_reclaim(sk);
53405337
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
53415338
!tcp_under_memory_pressure(sk))
53425339
break;
@@ -5383,7 +5380,6 @@ static int tcp_prune_queue(struct sock *sk)
53835380
skb_peek(&sk->sk_receive_queue),
53845381
NULL,
53855382
tp->copied_seq, tp->rcv_nxt);
5386-
sk_mem_reclaim(sk);
53875383

53885384
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
53895385
return 0;

net/ipv4/tcp_timer.c

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -290,15 +290,13 @@ void tcp_delack_timer_handler(struct sock *sk)
290290
{
291291
struct inet_connection_sock *icsk = inet_csk(sk);
292292

293-
sk_mem_reclaim_partial(sk);
294-
295293
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
296294
!(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
297-
goto out;
295+
return;
298296

299297
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
300298
sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
301-
goto out;
299+
return;
302300
}
303301
icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
304302

@@ -317,10 +315,6 @@ void tcp_delack_timer_handler(struct sock *sk)
317315
tcp_send_ack(sk);
318316
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
319317
}
320-
321-
out:
322-
if (tcp_under_memory_pressure(sk))
323-
sk_mem_reclaim(sk);
324318
}
325319

326320

@@ -600,11 +594,11 @@ void tcp_write_timer_handler(struct sock *sk)
600594

601595
if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
602596
!icsk->icsk_pending)
603-
goto out;
597+
return;
604598

605599
if (time_after(icsk->icsk_timeout, jiffies)) {
606600
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
607-
goto out;
601+
return;
608602
}
609603

610604
tcp_mstamp_refresh(tcp_sk(sk));
@@ -626,9 +620,6 @@ void tcp_write_timer_handler(struct sock *sk)
626620
tcp_probe_timer(sk);
627621
break;
628622
}
629-
630-
out:
631-
sk_mem_reclaim(sk);
632623
}
633624

634625
static void tcp_write_timer(struct timer_list *t)
@@ -743,8 +734,6 @@ static void tcp_keepalive_timer (struct timer_list *t)
743734
elapsed = keepalive_time_when(tp) - elapsed;
744735
}
745736

746-
sk_mem_reclaim(sk);
747-
748737
resched:
749738
inet_csk_reset_keepalive_timer (sk, elapsed);
750739
goto out;

net/iucv/af_iucv.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,6 @@ static void iucv_sock_destruct(struct sock *sk)
278278
skb_queue_purge(&sk->sk_receive_queue);
279279
skb_queue_purge(&sk->sk_error_queue);
280280

281-
sk_mem_reclaim(sk);
282-
283281
if (!sock_flag(sk, SOCK_DEAD)) {
284282
pr_err("Attempt to release alive iucv socket %p\n", sk);
285283
return;

net/mptcp/protocol.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -975,7 +975,7 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk)
975975
if (reclaimable > (int)PAGE_SIZE)
976976
__mptcp_rmem_reclaim(sk, reclaimable - 1);
977977

978-
sk_mem_reclaim_partial(sk);
978+
sk_mem_reclaim(sk);
979979
}
980980

981981
static void mptcp_mem_reclaim_partial(struct sock *sk)

net/sctp/sm_statefuns.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6590,8 +6590,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
65906590
pr_debug("%s: under pressure, reneging for tsn:%u\n",
65916591
__func__, tsn);
65926592
deliver = SCTP_CMD_RENEGE;
6593-
} else {
6594-
sk_mem_reclaim(sk);
65956593
}
65966594
}
65976595

net/sctp/socket.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1824,9 +1824,6 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
18241824
if (sctp_wspace(asoc) < (int)msg_len)
18251825
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
18261826

1827-
if (sk_under_memory_pressure(sk))
1828-
sk_mem_reclaim(sk);
1829-
18301827
if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
18311828
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
18321829
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
@@ -9195,8 +9192,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
91959192
goto do_error;
91969193
if (signal_pending(current))
91979194
goto do_interrupted;
9198-
if (sk_under_memory_pressure(sk))
9199-
sk_mem_reclaim(sk);
92009195
if ((int)msg_len <= sctp_wspace(asoc) &&
92019196
sk_wmem_schedule(sk, msg_len))
92029197
break;

net/sctp/stream_interleave.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -979,8 +979,6 @@ static void sctp_renege_events(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
979979

980980
if (freed >= needed && sctp_ulpevent_idata(ulpq, chunk, gfp) <= 0)
981981
sctp_intl_start_pd(ulpq, gfp);
982-
983-
sk_mem_reclaim(asoc->base.sk);
984982
}
985983

986984
static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid,

0 commit comments

Comments
 (0)