Skip to content

Commit

Permalink
vduse: Support registering userspace memory for IOTLB
Browse files Browse the repository at this point in the history
Introduce two ioctls: VDUSE_IOTLB_REG_UMEM and
VDUSE_IOTLB_DEREG_UMEM to support registering
and de-registering userspace memory for IOTLB
in virtio-vdpa case.

Now it only supports registering userspace memory
for IOTLB as bounce buffer.

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
  • Loading branch information
YongjiXie authored and intel-lab-lkp committed Jul 6, 2022
1 parent 8d9ff6a commit 9be6992
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
139 changes: 139 additions & 0 deletions drivers/vdpa/vdpa_user/vduse_dev.c
Expand Up @@ -21,6 +21,7 @@
#include <linux/uio.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
#include <linux/sched/mm.h>
#include <uapi/linux/vduse.h>
#include <uapi/linux/vdpa.h>
#include <uapi/linux/virtio_config.h>
Expand Down Expand Up @@ -64,6 +65,13 @@ struct vduse_vdpa {
struct vduse_dev *dev;
};

struct vduse_iotlb_mem {
unsigned long iova;
unsigned long npages;
struct page **pages;
struct mm_struct *mm;
};

struct vduse_dev {
struct vduse_vdpa *vdev;
struct device *dev;
Expand Down Expand Up @@ -95,6 +103,8 @@ struct vduse_dev {
u8 status;
u32 vq_num;
u32 vq_align;
struct vduse_iotlb_mem *iotlb_mem;
struct mutex mem_lock;
};

struct vduse_dev_msg {
Expand Down Expand Up @@ -917,6 +927,101 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
return ret;
}

static int vduse_dev_dereg_iotlb_mem(struct vduse_dev *dev,
u64 iova, u64 size)
{
int ret;

mutex_lock(&dev->mem_lock);
ret = -ENOENT;
if (!dev->iotlb_mem)
goto unlock;

ret = -EINVAL;
if (dev->iotlb_mem->iova != iova || size != dev->domain->bounce_size)
goto unlock;

vduse_domain_remove_user_bounce_pages(dev->domain);
unpin_user_pages_dirty_lock(dev->iotlb_mem->pages,
dev->iotlb_mem->npages, true);
atomic64_sub(dev->iotlb_mem->npages, &dev->iotlb_mem->mm->pinned_vm);
mmdrop(dev->iotlb_mem->mm);
vfree(dev->iotlb_mem->pages);
kfree(dev->iotlb_mem);
dev->iotlb_mem = NULL;
ret = 0;
unlock:
mutex_unlock(&dev->mem_lock);
return ret;
}

static int vduse_dev_reg_iotlb_mem(struct vduse_dev *dev,
u64 iova, u64 uaddr, u64 size)
{
struct page **page_list = NULL;
struct vduse_iotlb_mem *mem = NULL;
long pinned = 0;
unsigned long npages, lock_limit;
int ret;

if (size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;

mutex_lock(&dev->mem_lock);
ret = -EEXIST;
if (dev->iotlb_mem)
goto unlock;

ret = -ENOMEM;
npages = size >> PAGE_SHIFT;
page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
GFP_KERNEL_ACCOUNT);
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!page_list || !mem)
goto unlock;

mmap_read_lock(current->mm);

lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
goto out;

pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
page_list, NULL);
if (pinned != npages) {
ret = pinned < 0 ? pinned : -ENOMEM;
goto out;
}

ret = vduse_domain_add_user_bounce_pages(dev->domain,
page_list, pinned);
if (ret)
goto out;

atomic64_add(npages, &current->mm->pinned_vm);

mem->pages = page_list;
mem->npages = pinned;
mem->iova = iova;
mem->mm = current->mm;
mmgrab(current->mm);

dev->iotlb_mem = mem;
out:
if (ret && pinned > 0)
unpin_user_pages(page_list, pinned);

mmap_read_unlock(current->mm);
unlock:
if (ret) {
vfree(page_list);
kfree(mem);
}
mutex_unlock(&dev->mem_lock);
return ret;
}

static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
Expand All @@ -943,6 +1048,16 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
if (entry.start > entry.last)
break;

if (domain->bounce_map && dev->iotlb_mem) {
ret = -EEXIST;
if (entry.start >= 0 &&
entry.last < domain->bounce_size)
break;

if (entry.start < domain->bounce_size)
entry.start = domain->bounce_size;
}

spin_lock(&domain->iotlb_lock);
map = vhost_iotlb_itree_first(domain->iotlb,
entry.start, entry.last);
Expand Down Expand Up @@ -1102,6 +1217,28 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
ret = 0;
break;
}
case VDUSE_IOTLB_REG_UMEM: {
struct vduse_iotlb_umem umem;

ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;

ret = vduse_dev_reg_iotlb_mem(dev, umem.iova,
umem.uaddr, umem.size);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
struct vduse_iotlb_umem umem;

ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;

ret = vduse_dev_dereg_iotlb_mem(dev, umem.iova,
umem.size);
break;
}
default:
ret = -ENOIOCTLCMD;
break;
Expand All @@ -1114,6 +1251,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{
struct vduse_dev *dev = file->private_data;

vduse_dev_dereg_iotlb_mem(dev, 0, dev->domain->bounce_size);
spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list);
Expand Down Expand Up @@ -1176,6 +1314,7 @@ static struct vduse_dev *vduse_dev_create(void)
return NULL;

mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list);
Expand Down
28 changes: 28 additions & 0 deletions include/uapi/linux/vduse.h
Expand Up @@ -227,6 +227,34 @@ struct vduse_iotlb_info {
/* Get IOTLB information, e.g. bounce buffer size */
#define VDUSE_IOTLB_GET_INFO _IOR(VDUSE_BASE, 0x18, struct vduse_iotlb_info)

/**
* struct vduse_iotlb_umem - userspace memory configuration
* @uaddr: start address of userspace memory, it must be aligned to page size
* @iova: IOVA of userspace memory, it must be equal to bounce iova returned
* by VDUSE_IOTLB_GET_INFO now
* @size: size of userspace memory, it must be equal to bounce size returned
* by VDUSE_IOTLB_GET_INFO now
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
* ioctls to register/de-register userspace memory for IOTLB.
*/
struct vduse_iotlb_umem {
__u64 uaddr;
__u64 iova;
__u64 size;
__u64 reserved[3];
};

/*
* Register userspace memory for IOTLB. Now we only support registering
* userspace memory as bounce buffer.
*/
#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iotlb_umem)

/* De-register the userspace memory. Caller should set iova and size field. */
#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x1a, struct vduse_iotlb_umem)

/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */

/**
Expand Down

0 comments on commit 9be6992

Please sign in to comment.