Skip to content

Commit 6b88af8

Browse files
Dust Lidavem330
authored andcommitted
net/smc: don't send in the BH context if sock_owned_by_user
Send data all the way down to the RDMA device is a time consuming operation(get a new slot, maybe do RDMA Write and send a CDC, etc). Moving those operations from BH to user context is good for performance. If the sock_lock is hold by user, we don't try to send data out in the BH context, but just mark we should send. Since the user will release the sock_lock soon, we can do the sending there. Add smc_release_cb() which will be called in release_sock() and try send in the callback if needed. This patch moves the sending part out from BH if sock lock is hold by user. In my testing environment, this saves about 20% softirq in the qperf 4K tcp_bw test in the sender side with no noticeable throughput drop. Signed-off-by: Dust Li <dust.li@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent a505cce commit 6b88af8

File tree

3 files changed

+34
-5
lines changed

3 files changed

+34
-5
lines changed

net/smc/af_smc.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,27 @@ void smc_unhash_sk(struct sock *sk)
193193
}
194194
EXPORT_SYMBOL_GPL(smc_unhash_sk);
195195

196+
/* This will be called before user really release sock_lock. So do the
197+
* work which we didn't do because of user hold the sock_lock in the
198+
* BH context
199+
*/
200+
static void smc_release_cb(struct sock *sk)
201+
{
202+
struct smc_sock *smc = smc_sk(sk);
203+
204+
if (smc->conn.tx_in_release_sock) {
205+
smc_tx_pending(&smc->conn);
206+
smc->conn.tx_in_release_sock = false;
207+
}
208+
}
209+
196210
struct proto smc_proto = {
197211
.name = "SMC",
198212
.owner = THIS_MODULE,
199213
.keepalive = smc_set_keepalive,
200214
.hash = smc_hash_sk,
201215
.unhash = smc_unhash_sk,
216+
.release_cb = smc_release_cb,
202217
.obj_size = sizeof(struct smc_sock),
203218
.h.smc_hash = &smc_v4_hashinfo,
204219
.slab_flags = SLAB_TYPESAFE_BY_RCU,
@@ -211,6 +226,7 @@ struct proto smc_proto6 = {
211226
.keepalive = smc_set_keepalive,
212227
.hash = smc_hash_sk,
213228
.unhash = smc_unhash_sk,
229+
.release_cb = smc_release_cb,
214230
.obj_size = sizeof(struct smc_sock),
215231
.h.smc_hash = &smc_v6_hashinfo,
216232
.slab_flags = SLAB_TYPESAFE_BY_RCU,

net/smc/smc.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,10 @@ struct smc_connection {
213213
* data still pending
214214
*/
215215
char urg_rx_byte; /* urgent byte */
216+
bool tx_in_release_sock;
217+
/* flush pending tx data in
218+
* sock release_cb()
219+
*/
216220
atomic_t bytes_to_rcv; /* arrived data,
217221
* not yet received
218222
*/

net/smc/smc_cdc.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
4949
}
5050

5151
if (atomic_dec_and_test(&conn->cdc_pend_tx_wr)) {
52-
/* If this is the last pending WR complete, we must push to
53-
* prevent hang when autocork enabled.
52+
/* If user owns the sock_lock, mark the connection need sending.
53+
* User context will later try to send when it release sock_lock
54+
* in smc_release_cb()
5455
*/
55-
smc_tx_sndbuf_nonempty(conn);
56+
if (sock_owned_by_user(&smc->sk))
57+
conn->tx_in_release_sock = true;
58+
else
59+
smc_tx_pending(conn);
60+
5661
if (unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq)))
5762
wake_up(&conn->cdc_pend_tx_wq);
5863
}
@@ -355,8 +360,12 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
355360
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
356361
if ((diff_cons && smc_tx_prepared_sends(conn)) ||
357362
conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
358-
conn->local_rx_ctrl.prod_flags.urg_data_pending)
359-
smc_tx_sndbuf_nonempty(conn);
363+
conn->local_rx_ctrl.prod_flags.urg_data_pending) {
364+
if (!sock_owned_by_user(&smc->sk))
365+
smc_tx_pending(conn);
366+
else
367+
conn->tx_in_release_sock = true;
368+
}
360369

361370
if (diff_cons && conn->urg_tx_pend &&
362371
atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {

0 commit comments

Comments
 (0)