Skip to content

Commit

Permalink
KVM: guest_mem: Delay binding of memslot to guest_mem file
Browse files Browse the repository at this point in the history
This patch uses the functions kvm_gmem_bind() and
kvm_gmem_init_memslot() introduced in the previous patch.

The pointer to kvm is not stored when the guest_mem file is
created. Instead, binding (storing reference to memslot that is using
certain ranges in a file) is deferred till pages in the memslot are
requested (kvm_gmem_get_pfn()).

When memslots with private memory are requested by userspace, the
memslot's gmem fields are initialized using kvm_gmem_init_memslot(),
and when the memslot is freed in kvm_free_memslot(),
kvm_gmem_destroy_memslot() is used to clean the gmem fields up.
  • Loading branch information
Ackerley Tng committed Jul 11, 2023
1 parent 93b31a0 commit dd5ac5e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 71 deletions.
89 changes: 24 additions & 65 deletions virt/kvm/guest_mem.c
Expand Up @@ -155,19 +155,24 @@ static long kvm_gmem_punch_hole(struct file *file, loff_t offset, loff_t len)
struct kvm_gmem *gmem = file->private_data;
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
struct kvm *kvm = gmem->kvm;

/*
* Bindings must stable across invalidation to ensure the start+end
* are balanced.
*/
filemap_invalidate_lock(file->f_mapping);

kvm_gmem_invalidate_begin(kvm, gmem, start, end);
/*
* If gmem->kvm hasn't been bound, the backing memory hasn't been used
* before - there is no need to invalidate anything
*/
if (gmem->kvm)
kvm_gmem_invalidate_begin(gmem->kvm, gmem, start, end);

truncate_inode_pages_range(file->f_mapping, offset, offset + len - 1);

kvm_gmem_invalidate_end(kvm, gmem, start, end);
if (gmem->kvm)
kvm_gmem_invalidate_end(gmem->kvm, gmem, start, end);

filemap_invalidate_unlock(file->f_mapping);

Expand Down Expand Up @@ -258,20 +263,6 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
return 0;
}

static struct file *kvm_gmem_get_file(struct kvm_memory_slot *slot)
{
struct file *file;

rcu_read_lock();

file = rcu_dereference(slot->gmem.file);
if (file && !get_file_rcu(file))
file = NULL;
rcu_read_unlock();

return file;
}

static const struct file_operations kvm_gmem_fops = {
.open = generic_file_open,
.release = kvm_gmem_release,
Expand Down Expand Up @@ -346,8 +337,7 @@ static const struct inode_operations kvm_gmem_iops = {
.setattr = kvm_gmem_setattr,
};

static struct inode *kvm_gmem_create_inode(struct kvm *kvm, loff_t size, u64 flags,
struct vfsmount *mnt)
static struct inode *kvm_gmem_create_inode(loff_t size, u64 flags, struct vfsmount *mnt)
{
int err;
struct inode *inode;
Expand All @@ -370,8 +360,6 @@ static struct inode *kvm_gmem_create_inode(struct kvm *kvm, loff_t size, u64 fla

xa_init(&gmem->bindings);

kvm_get_kvm(kvm);
gmem->kvm = kvm;
gmem->flags = flags;

inode->i_op = &kvm_gmem_iops;
Expand All @@ -391,13 +379,12 @@ static struct inode *kvm_gmem_create_inode(struct kvm *kvm, loff_t size, u64 fla
}


static struct file *kvm_gmem_create_file(struct kvm *kvm, loff_t size, u64 flags,
struct vfsmount *mnt)
static struct file *kvm_gmem_create_file(loff_t size, u64 flags, struct vfsmount *mnt)
{
struct file *file;
struct inode *inode;

inode = kvm_gmem_create_inode(kvm, size, flags, mnt);
inode = kvm_gmem_create_inode(size, flags, mnt);
if (IS_ERR(inode))
return ERR_CAST(inode);

Expand Down Expand Up @@ -440,7 +427,7 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *gmem)
if (fd < 0)
return fd;

file = kvm_gmem_create_file(kvm, size, flags, kvm_gmem_mnt);
file = kvm_gmem_create_file(size, flags, kvm_gmem_mnt);
if (IS_ERR(file)) {
put_unused_fd(fd);
return PTR_ERR(file);
Expand Down Expand Up @@ -605,36 +592,30 @@ void kvm_gmem_destroy_memslot(struct kvm_memory_slot *slot)
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, int *order)
{
int ret;
pgoff_t index = gfn - slot->base_gfn + slot->gmem.index;
struct kvm_gmem *gmem;
struct folio *folio;
struct page *page;
struct file *file;

file = kvm_gmem_get_file(slot);
ret = kvm_gmem_bind(kvm, slot);
if (ret)
return ret;

file = slot->gmem.file;
if (!file)
return -EFAULT;

gmem = file->private_data;

if (WARN_ON_ONCE(xa_load(&gmem->bindings, index) != slot)) {
fput(file);
return -EIO;
}

folio = kvm_gmem_get_folio(file, index);
if (!folio) {
fput(file);
if (!folio)
return -ENOMEM;
}

page = folio_file_page(folio, index);

*pfn = page_to_pfn(page);
*order = thp_order(compound_head(page));

folio_unlock(folio);
fput(file);

return 0;
}
Expand All @@ -643,9 +624,6 @@ EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
static void kvm_gmem_evict_inode(struct inode *inode)
{
struct kvm_gmem *gmem = inode->i_mapping->private_data;
struct kvm_memory_slot *slot;
struct kvm *kvm;
unsigned long index;

/*
* If iput() was called before inode is completely set up due to some
Expand All @@ -654,40 +632,21 @@ static void kvm_gmem_evict_inode(struct inode *inode)
if (!gmem)
goto basic_cleanup;

kvm = gmem->kvm;

/*
* Prevent concurrent attempts to *unbind* a memslot. This is the last
* reference to the file and thus no new bindings can be created, but
* deferencing the slot for existing bindings needs to be protected
* against memslot updates, specifically so that unbind doesn't race
* and free the memslot (kvm_gmem_get_file() will return NULL).
* Because each memslot holds a reference to the file, if we get here,
* kvm_gmem_destroy_memslot() would have been called on each
* memslot. This means there should no longer be any bindings, and so
* there is no need for any invalidation on the KVM side.
*/
mutex_lock(&kvm->slots_lock);

xa_for_each(&gmem->bindings, index, slot)
rcu_assign_pointer(slot->gmem.file, NULL);

synchronize_rcu();
BUG_ON(!xa_empty(&gmem->bindings));

/*
* All in-flight operations are gone and new bindings can be created.
* Free the backing memory, and more importantly, zap all SPTEs that
* pointed at this file.
*/
kvm_gmem_invalidate_begin(kvm, gmem, 0, -1ul);
truncate_inode_pages_final(inode->i_mapping);
kvm_gmem_invalidate_end(kvm, gmem, 0, -1ul);

mutex_unlock(&kvm->slots_lock);

WARN_ON_ONCE(!(mapping_empty(inode->i_mapping)));

xa_destroy(&gmem->bindings);
kfree(gmem);

kvm_put_kvm(kvm);

basic_cleanup:
clear_inode(inode);
}
Expand Down
9 changes: 3 additions & 6 deletions virt/kvm/kvm_main.c
Expand Up @@ -981,7 +981,7 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
if (slot->flags & KVM_MEM_PRIVATE)
kvm_gmem_unbind(slot);
kvm_gmem_destroy_memslot(slot);

kvm_destroy_dirty_bitmap(slot);

Expand Down Expand Up @@ -2061,20 +2061,17 @@ int __kvm_set_memory_region(struct kvm *kvm,
new->flags = mem->flags;
new->userspace_addr = mem->userspace_addr;
if (mem->flags & KVM_MEM_PRIVATE) {
r = kvm_gmem_bind(kvm, new, mem->gmem_fd, mem->gmem_offset);
r = kvm_gmem_init_memslot(new, mem->gmem_fd, mem->gmem_offset);
if (r)
goto out;
}

r = kvm_set_memslot(kvm, old, new, change);
if (r)
goto out_restricted;
goto out;

return 0;

out_restricted:
if (mem->flags & KVM_MEM_PRIVATE)
kvm_gmem_unbind(new);
out:
kfree(new);
return r;
Expand Down

0 comments on commit dd5ac5e

Please sign in to comment.