Skip to content

Commit ac383f5

Browse files
sunilmutdavem330
authored andcommitted
hv_sock: perf: Allow the socket buffer size options to influence the actual socket buffers
Currently, the hv_sock buffer size is static and can't scale to the bandwidth requirements of the application. This change allows the applications to influence the socket buffer sizes using the SO_SNDBUF and the SO_RCVBUF socket options. Few interesting points to note: 1. Since the VMBUS does not allow a resize operation of the ring size, the socket buffer size option should be set prior to establishing the connection for it to take effect. 2. Setting the socket option comes with the cost of that much memory being reserved/allocated by the kernel, for the lifetime of the connection. Perf data: Total Data Transfer: 1GB Single threaded reader/writer Results below are summarized over 10 iterations. Linux hvsocket writer + Windows hvsocket reader: |---------------------------------------------------------------------------------------------| |Packet size -> | 128B | 1KB | 4KB | 64KB | |---------------------------------------------------------------------------------------------| |SO_SNDBUF size | | Throughput in MB/s (min/max/avg/median): | | v | | |---------------------------------------------------------------------------------------------| | Default | 109/118/114/116 | 636/774/701/700 | 435/507/480/476 | 410/491/462/470 | | 16KB | 110/116/112/111 | 575/705/662/671 | 749/900/854/869 | 592/824/692/676 | | 32KB | 108/120/115/115 | 703/823/767/772 | 718/878/850/866 | 1593/2124/2000/2085 | | 64KB | 108/119/114/114 | 592/732/683/688 | 805/934/903/911 | 1784/1943/1862/1843 | |---------------------------------------------------------------------------------------------| Windows hvsocket writer + Linux hvsocket reader: |---------------------------------------------------------------------------------------------| |Packet size -> | 128B | 1KB | 4KB | 64KB | |---------------------------------------------------------------------------------------------| |SO_RCVBUF size | | Throughput in MB/s (min/max/avg/median): | | v | | |---------------------------------------------------------------------------------------------| | Default | 69/82/75/73 | 313/343/333/336 | 418/477/446/445 | 659/701/676/678 | | 16KB | 69/83/76/77 | 350/401/375/382 | 506/548/517/516 | 602/624/615/615 | | 32KB | 62/83/73/73 | 471/529/496/494 | 830/1046/935/939 | 944/1180/1070/1100 | | 64KB | 64/70/68/69 | 467/533/501/497 | 1260/1590/1430/1431 | 1605/1819/1670/1660 | |---------------------------------------------------------------------------------------------| Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com> Reviewed-by: Dexuan Cui <decui@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 0db355d commit ac383f5

File tree

1 file changed

+40
-10
lines changed

1 file changed

+40
-10
lines changed

net/vmw_vsock/hyperv_transport.c

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@
2323
#include <net/sock.h>
2424
#include <net/af_vsock.h>
2525

26-
/* The host side's design of the feature requires 6 exact 4KB pages for
27-
* recv/send rings respectively -- this is suboptimal considering memory
28-
* consumption, however unluckily we have to live with it, before the
29-
* host comes up with a better design in the future.
26+
/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
27+
* stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
28+
* hosts don't have this limitation; but, keep the defaults the same for compat.
3029
*/
3130
#define PAGE_SIZE_4K 4096
3231
#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
3332
#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
33+
#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
3434

3535
/* The MTU is 16KB per the host side's design */
3636
#define HVS_MTU_SIZE (1024 * 16)
@@ -344,9 +344,12 @@ static void hvs_open_connection(struct vmbus_channel *chan)
344344

345345
struct sockaddr_vm addr;
346346
struct sock *sk, *new = NULL;
347-
struct vsock_sock *vnew;
348-
struct hvsock *hvs, *hvs_new;
347+
struct vsock_sock *vnew = NULL;
348+
struct hvsock *hvs = NULL;
349+
struct hvsock *hvs_new = NULL;
350+
int rcvbuf;
349351
int ret;
352+
int sndbuf;
350353

351354
if_type = &chan->offermsg.offer.if_type;
352355
if_instance = &chan->offermsg.offer.if_instance;
@@ -388,9 +391,34 @@ static void hvs_open_connection(struct vmbus_channel *chan)
388391
}
389392

390393
set_channel_read_mode(chan, HV_CALL_DIRECT);
391-
ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
392-
RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
393-
hvs_channel_cb, conn_from_host ? new : sk);
394+
395+
/* Use the socket buffer sizes as hints for the VMBUS ring size. For
396+
* server side sockets, 'sk' is the parent socket and thus, this will
397+
* allow the child sockets to inherit the size from the parent. Keep
398+
* the mins to the default value and align to page size as per VMBUS
399+
* requirements.
400+
* For the max, the socket core library will limit the socket buffer
401+
* size that can be set by the user, but, since currently, the hv_sock
402+
* VMBUS ring buffer is physically contiguous allocation, restrict it
403+
* further.
404+
* Older versions of hv_sock host side code cannot handle bigger VMBUS
405+
* ring buffer size. Use the version number to limit the change to newer
406+
* versions.
407+
*/
408+
if (vmbus_proto_version < VERSION_WIN10_V5) {
409+
sndbuf = RINGBUFFER_HVS_SND_SIZE;
410+
rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
411+
} else {
412+
sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
413+
sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
414+
sndbuf = ALIGN(sndbuf, PAGE_SIZE);
415+
rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
416+
rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
417+
rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
418+
}
419+
420+
ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
421+
conn_from_host ? new : sk);
394422
if (ret != 0) {
395423
if (conn_from_host) {
396424
hvs_new->chan = NULL;
@@ -441,14 +469,16 @@ static u32 hvs_get_local_cid(void)
441469
static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
442470
{
443471
struct hvsock *hvs;
472+
struct sock *sk = sk_vsock(vsk);
444473

445474
hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
446475
if (!hvs)
447476
return -ENOMEM;
448477

449478
vsk->trans = hvs;
450479
hvs->vsk = vsk;
451-
480+
sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
481+
sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
452482
return 0;
453483
}
454484

0 commit comments

Comments
 (0)