Skip to content

Commit

Permalink
KVM: Implement dirty quota-based throttling of vcpus
Browse files Browse the repository at this point in the history
Define variables to track and throttle memory dirtying for every vcpu.

dirty_count:    Number of pages the vcpu has dirtied since its creation,
                while dirty logging is enabled.
dirty_quota:    Number of pages the vcpu is allowed to dirty. To dirty
                more, it needs to request more quota by exiting to
                userspace.

Implement the flow for throttling based on dirty quota.

i) Increment dirty_count for the vcpu whenever it dirties a page.
ii) Exit to userspace whenever the dirty quota is exhausted (i.e. dirty
count equals/exceeds dirty quota) to request more dirty quota.

Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com>
Suggested-by: Manish Mishra <manish.mishra@nutanix.com>
Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com>
Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com>
  • Loading branch information
shivam-kumar1 authored and intel-lab-lkp committed Sep 12, 2022
1 parent 372d070 commit 772b4ec
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 4 deletions.
32 changes: 32 additions & 0 deletions Documentation/virt/kvm/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6614,6 +6614,24 @@ array field represents return values. The userspace should update the return
values of SBI call before resuming the VCPU. For more details on RISC-V SBI
spec refer, https://github.com/riscv/riscv-sbi-doc.

::

/* KVM_EXIT_DIRTY_QUOTA_EXHAUSTED */
struct {
__u64 count;
__u64 quota;
} dirty_quota_exit;
If exit reason is KVM_EXIT_DIRTY_QUOTA_EXHAUSTED, it indicates that the VCPU has
exhausted its dirty quota. The 'dirty_quota_exit' member of kvm_run structure
makes the following information available to the userspace:
'count' field: the current count of pages dirtied by the VCPU, can be
skewed based on the size of the pages accessed by each vCPU.
'quota' field: the observed dirty quota just before the exit to userspace.
The userspace can design a strategy to allocate the overall scope of dirtying
for the VM among the vcpus. Based on the strategy and the current state of dirty
quota throttling, the userspace can make a decision to either update (increase)
the quota or to put the VCPU to sleep for some time.

::

/* KVM_EXIT_NOTIFY */
Expand Down Expand Up @@ -6668,6 +6686,20 @@ values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.

::

/*
* Number of pages the vCPU is allowed to have dirtied over its entire
* lifetime. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if the quota
* is reached/exceeded.
*/
__u64 dirty_quota;
Please note that enforcing the quota is best effort, as the guest may dirty
multiple pages before KVM can recheck the quota. However, unless KVM is using
a hardware-based dirty ring buffer, e.g. Intel's Page Modification Logging,
KVM will detect quota exhaustion within a handful of dirtied page. If a
hardware ring buffer is used, the overrun is bounded by the size of the buffer
(512 entries for PML).

::
};


Expand Down
20 changes: 19 additions & 1 deletion include/linux/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,13 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQUEST_NO_ACTION BIT(10)
/*
* Architecture-independent vcpu->requests bit members
* Bits 4-7 are reserved for more arch-independent bits.
* Bits 5-7 are reserved for more arch-independent bits.
*/
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
#define KVM_REQ_DIRTY_QUOTA_EXIT 4
#define KVM_REQUEST_ARCH_BASE 8

/*
Expand Down Expand Up @@ -380,6 +381,8 @@ struct kvm_vcpu {
*/
struct kvm_memory_slot *last_used_slot;
u64 last_used_slot_gen;

u64 dirty_quota;
};

/*
Expand Down Expand Up @@ -542,6 +545,21 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}

static inline int kvm_vcpu_check_dirty_quota(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u64 dirty_quota = READ_ONCE(run->dirty_quota);
u64 pages_dirtied = vcpu->stat.generic.pages_dirtied;

if (!dirty_quota || (pages_dirtied < dirty_quota))
return 1;

run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
run->dirty_quota_exit.count = pages_dirtied;
run->dirty_quota_exit.quota = dirty_quota;
return 0;
}

/*
* Some of the bitops functions do not support too long bitmaps.
* This number must be determined not to exceed such limits.
Expand Down
1 change: 1 addition & 0 deletions include/linux/kvm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ struct kvm_vcpu_stat_generic {
u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT];
u64 halt_wait_hist[HALT_POLL_HIST_COUNT];
u64 blocking;
u64 pages_dirtied;
};

#define KVM_STATS_NAME_SIZE 48
Expand Down
12 changes: 12 additions & 0 deletions include/uapi/linux/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_RISCV_SBI 35
#define KVM_EXIT_RISCV_CSR 36
#define KVM_EXIT_NOTIFY 37
#define KVM_EXIT_DIRTY_QUOTA_EXHAUSTED 38

/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
Expand Down Expand Up @@ -510,6 +511,11 @@ struct kvm_run {
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
__u32 flags;
} notify;
/* KVM_EXIT_DIRTY_QUOTA_EXHAUSTED */
struct {
__u64 count;
__u64 quota;
} dirty_quota_exit;
/* Fix the size of the union. */
char padding[256];
};
Expand All @@ -531,6 +537,12 @@ struct kvm_run {
struct kvm_sync_regs regs;
char padding[SYNC_REGS_SIZE_BYTES];
} s;
/*
* Number of pages the vCPU is allowed to have dirtied over its entire
* lifetime. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED if the
* quota is reached/exceeded.
*/
__u64 dirty_quota;
};

/* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
Expand Down
26 changes: 23 additions & 3 deletions virt/kvm/kvm_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3298,18 +3298,36 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);

static void kvm_vcpu_is_dirty_quota_exhausted(struct kvm_vcpu *vcpu)
{
u64 dirty_quota = READ_ONCE(vcpu->run->dirty_quota);

if (!dirty_quota || (vcpu->stat.generic.pages_dirtied < dirty_quota))
return;

/*
* Snapshot the quota to report it to userspace. The dirty count will be
* captured when the request is processed.
*/
vcpu->dirty_quota = dirty_quota;
kvm_make_request(KVM_REQ_DIRTY_QUOTA_EXIT, vcpu);
}

void mark_page_dirty_in_slot(struct kvm *kvm,
const struct kvm_memory_slot *memslot,
gfn_t gfn)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();

#ifdef CONFIG_HAVE_KVM_DIRTY_RING
if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
return;
#endif

if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
if (!memslot)
return;

WARN_ON_ONCE(!vcpu->stat.generic.pages_dirtied++);

if (kvm_slot_dirty_track_enabled(memslot)) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
u32 slot = (memslot->as_id << 16) | memslot->id;

Expand All @@ -3318,6 +3336,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
slot, rel_gfn);
else
set_bit_le(rel_gfn, memslot->dirty_bitmap);

kvm_vcpu_is_dirty_quota_exhausted(vcpu);
}
}
EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
Expand Down

0 comments on commit 772b4ec

Please sign in to comment.