@@ -65,6 +65,16 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
6565/* Whether to overlap the regions of memory vCPUs access. */
6666static bool overlap_memory_access ;
6767
68+ /*
69+ * If the test should only warn if there are too many idle pages (i.e., it is
70+ * expected).
71+ * -1: Not yet set.
72+ * 0: We do not expect too many idle pages, so FAIL if too many idle pages.
73+ * 1: Having too many idle pages is expected, so merely print a warning if
74+ * too many idle pages are found.
75+ */
76+ static int idle_pages_warn_only = -1 ;
77+
6878struct test_params {
6979 /* The backing source for the region of memory. */
7080 enum vm_mem_backing_src_type backing_src ;
@@ -177,18 +187,12 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
177187 * arbitrary; high enough that we ensure most memory access went through
178188 * access tracking but low enough as to not make the test too brittle
179189 * over time and across architectures.
180- *
181- * When running the guest as a nested VM, "warn" instead of asserting
182- * as the TLB size is effectively unlimited and the KVM doesn't
183- * explicitly flush the TLB when aging SPTEs. As a result, more pages
184- * are cached and the guest won't see the "idle" bit cleared.
185190 */
186191 if (still_idle >= pages / 10 ) {
187- #ifdef __x86_64__
188- TEST_ASSERT (this_cpu_has (X86_FEATURE_HYPERVISOR ),
192+ TEST_ASSERT (idle_pages_warn_only ,
189193 "vCPU%d: Too many pages still idle (%lu out of %lu)" ,
190194 vcpu_idx , still_idle , pages );
191- #endif
195+
192196 printf ("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
193197 "this will affect performance results.\n" ,
194198 vcpu_idx , still_idle , pages );
@@ -328,6 +332,32 @@ static void run_test(enum vm_guest_mode mode, void *arg)
328332 memstress_destroy_vm (vm );
329333}
330334
335+ static int access_tracking_unreliable (void )
336+ {
337+ #ifdef __x86_64__
338+ /*
339+ * When running nested, the TLB size may be effectively unlimited (for
340+ * example, this is the case when running on KVM L0), and KVM doesn't
341+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
342+ * are cached and the guest won't see the "idle" bit cleared.
343+ */
344+ if (this_cpu_has (X86_FEATURE_HYPERVISOR )) {
345+ puts ("Skipping idle page count sanity check, because the test is run nested" );
346+ return 1 ;
347+ }
348+ #endif
349+ /*
350+ * When NUMA balancing is enabled, guest memory will be unmapped to get
351+ * NUMA faults, dropping the Accessed bits.
352+ */
353+ if (is_numa_balancing_enabled ()) {
354+ puts ("Skipping idle page count sanity check, because NUMA balancing is enabled" );
355+ return 1 ;
356+ }
357+
358+ return 0 ;
359+ }
360+
331361static void help (char * name )
332362{
333363 puts ("" );
@@ -342,6 +372,12 @@ static void help(char *name)
342372 printf (" -v: specify the number of vCPUs to run.\n" );
343373 printf (" -o: Overlap guest memory accesses instead of partitioning\n"
344374 " them into a separate region of memory for each vCPU.\n" );
375+ printf (" -w: Control whether the test warns or fails if more than 10%%\n"
376+ " of pages are still seen as idle/old after accessing guest\n"
377+ " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
378+ " mode, the test fails by default, but switches to warn only\n"
379+ " if NUMA balancing is enabled or the test detects it's running\n"
380+ " in a VM.\n" );
345381 backing_src_help ("-s" );
346382 puts ("" );
347383 exit (0 );
@@ -359,7 +395,7 @@ int main(int argc, char *argv[])
359395
360396 guest_modes_append_default ();
361397
362- while ((opt = getopt (argc , argv , "hm:b:v:os:" )) != -1 ) {
398+ while ((opt = getopt (argc , argv , "hm:b:v:os:w: " )) != -1 ) {
363399 switch (opt ) {
364400 case 'm' :
365401 guest_modes_cmdline (optarg );
@@ -376,6 +412,11 @@ int main(int argc, char *argv[])
376412 case 's' :
377413 params .backing_src = parse_backing_src_type (optarg );
378414 break ;
415+ case 'w' :
416+ idle_pages_warn_only =
417+ atoi_non_negative ("Idle pages warning" ,
418+ optarg );
419+ break ;
379420 case 'h' :
380421 default :
381422 help (argv [0 ]);
@@ -388,6 +429,9 @@ int main(int argc, char *argv[])
388429 "CONFIG_IDLE_PAGE_TRACKING is not enabled" );
389430 close (page_idle_fd );
390431
432+ if (idle_pages_warn_only == -1 )
433+ idle_pages_warn_only = access_tracking_unreliable ();
434+
391435 for_each_guest_mode (run_test , & params );
392436
393437 return 0 ;
0 commit comments