This repository has been archived by the owner on May 22, 2023. It is now read-only.
/
vm_fault.c
6545 lines (5696 loc) · 178 KB
/
vm_fault.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2000-2009 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
/*
* Mach Operating System
* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
/*
*/
/*
* File: vm_fault.c
* Author: Avadis Tevanian, Jr., Michael Wayne Young
*
* Page fault handling module.
*/
#include <mach_cluster_stats.h>
#include <mach_pagemap.h>
#include <libkern/OSAtomic.h>
#include <mach/mach_types.h>
#include <mach/kern_return.h>
#include <mach/message.h> /* for error codes */
#include <mach/vm_param.h>
#include <mach/vm_behavior.h>
#include <mach/memory_object.h>
/* For memory_object_data_{request,unlock} */
#include <mach/sdt.h>
#include <kern/kern_types.h>
#include <kern/host_statistics.h>
#include <kern/counters.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/sched_prim.h>
#include <kern/host.h>
#include <kern/xpr.h>
#include <kern/mach_param.h>
#include <kern/macro_help.h>
#include <kern/zalloc.h>
#include <kern/misc_protos.h>
#include <kern/policy_internal.h>
#include <vm/vm_compressor.h>
#include <vm/vm_compressor_pager.h>
#include <vm/vm_fault.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
#include <vm/pmap.h>
#include <vm/vm_pageout.h>
#include <vm/vm_protos.h>
#include <vm/vm_external.h>
#include <vm/memory_object.h>
#include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */
#include <vm/vm_shared_region.h>
#include <sys/codesign.h>
#include <sys/reason.h>
#include <sys/signalvar.h>
#include <san/kasan.h>
#define VM_FAULT_CLASSIFY 0
#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */
unsigned int vm_object_pagein_throttle = 16;
/*
* We apply a hard throttle to the demand zero rate of tasks that we believe are running out of control which
* kicks in when swap space runs out. 64-bit programs have massive address spaces and can leak enormous amounts
* of memory if they're buggy and can run the system completely out of swap space. If this happens, we
* impose a hard throttle on them to prevent them from taking the last bit of memory left. This helps
* keep the UI active so that the user has a chance to kill the offending task before the system
* completely hangs.
*
* The hard throttle is only applied when the system is nearly completely out of swap space and is only applied
* to tasks that appear to be bloated. When swap runs out, any task using more than vm_hard_throttle_threshold
* will be throttled. The throttling is done by giving the thread that's trying to demand zero a page a
* delay of HARD_THROTTLE_DELAY microseconds before being allowed to try the page fault again.
*/
extern void throttle_lowpri_io(int);
extern struct vnode *vnode_pager_lookup_vnode(memory_object_t);
uint64_t vm_hard_throttle_threshold;
#define NEED_TO_HARD_THROTTLE_THIS_TASK() (vm_wants_task_throttled(current_task()) || \
((vm_page_free_count < vm_page_throttle_limit || \
HARD_THROTTLE_LIMIT_REACHED()) && \
proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO) >= THROTTLE_LEVEL_THROTTLED))
#define HARD_THROTTLE_DELAY 10000 /* 10000 us == 10 ms */
#define SOFT_THROTTLE_DELAY 200 /* 200 us == .2 ms */
#define VM_PAGE_CREATION_THROTTLE_PERIOD_SECS 6
#define VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC 20000
boolean_t current_thread_aborted(void);
/* Forward declarations of internal routines. */
static kern_return_t vm_fault_wire_fast(
vm_map_t map,
vm_map_offset_t va,
vm_prot_t prot,
vm_tag_t wire_tag,
vm_map_entry_t entry,
pmap_t pmap,
vm_map_offset_t pmap_addr,
ppnum_t *physpage_p);
static kern_return_t vm_fault_internal(
vm_map_t map,
vm_map_offset_t vaddr,
vm_prot_t caller_prot,
boolean_t change_wiring,
vm_tag_t wire_tag,
int interruptible,
pmap_t pmap,
vm_map_offset_t pmap_addr,
ppnum_t *physpage_p);
static void vm_fault_copy_cleanup(
vm_page_t page,
vm_page_t top_page);
static void vm_fault_copy_dst_cleanup(
vm_page_t page);
#if VM_FAULT_CLASSIFY
extern void vm_fault_classify(vm_object_t object,
vm_object_offset_t offset,
vm_prot_t fault_type);
extern void vm_fault_classify_init(void);
#endif
unsigned long vm_pmap_enter_blocked = 0;
unsigned long vm_pmap_enter_retried = 0;
unsigned long vm_cs_validates = 0;
unsigned long vm_cs_revalidates = 0;
unsigned long vm_cs_query_modified = 0;
unsigned long vm_cs_validated_dirtied = 0;
unsigned long vm_cs_bitmap_validated = 0;
#if PMAP_CS
uint64_t vm_cs_defer_to_pmap_cs = 0;
uint64_t vm_cs_defer_to_pmap_cs_not = 0;
#endif /* PMAP_CS */
void vm_pre_fault(vm_map_offset_t);
extern char *kdp_compressor_decompressed_page;
extern addr64_t kdp_compressor_decompressed_page_paddr;
extern ppnum_t kdp_compressor_decompressed_page_ppnum;
struct vmrtfr {
int vmrtfr_maxi;
int vmrtfr_curi;
int64_t vmrtf_total;
vm_rtfault_record_t *vm_rtf_records;
} vmrtfrs;
#define VMRTF_DEFAULT_BUFSIZE (4096)
#define VMRTF_NUM_RECORDS_DEFAULT (VMRTF_DEFAULT_BUFSIZE / sizeof(vm_rtfault_record_t))
int vmrtf_num_records = VMRTF_NUM_RECORDS_DEFAULT;
static void vm_rtfrecord_lock(void);
static void vm_rtfrecord_unlock(void);
static void vm_record_rtfault(thread_t, uint64_t, vm_map_offset_t, int);
lck_spin_t vm_rtfr_slock;
extern lck_grp_t vm_page_lck_grp_bucket;
extern lck_attr_t vm_page_lck_attr;
/*
* Routine: vm_fault_init
* Purpose:
* Initialize our private data structures.
*/
void
vm_fault_init(void)
{
int i, vm_compressor_temp;
boolean_t need_default_val = TRUE;
/*
* Choose a value for the hard throttle threshold based on the amount of ram. The threshold is
* computed as a percentage of available memory, and the percentage used is scaled inversely with
* the amount of memory. The percentage runs between 10% and 35%. We use 35% for small memory systems
* and reduce the value down to 10% for very large memory configurations. This helps give us a
* definition of a memory hog that makes more sense relative to the amount of ram in the machine.
* The formula here simply uses the number of gigabytes of ram to adjust the percentage.
*/
vm_hard_throttle_threshold = sane_size * (35 - MIN((int)(sane_size / (1024*1024*1024)), 25)) / 100;
/*
* Configure compressed pager behavior. A boot arg takes precedence over a device tree entry.
*/
if (PE_parse_boot_argn("vm_compressor", &vm_compressor_temp, sizeof (vm_compressor_temp))) {
for ( i = 0; i < VM_PAGER_MAX_MODES; i++) {
if (vm_compressor_temp > 0 &&
((vm_compressor_temp & ( 1 << i)) == vm_compressor_temp)) {
need_default_val = FALSE;
vm_compressor_mode = vm_compressor_temp;
break;
}
}
if (need_default_val)
printf("Ignoring \"vm_compressor\" boot arg %d\n", vm_compressor_temp);
}
if (need_default_val) {
/* If no boot arg or incorrect boot arg, try device tree. */
PE_get_default("kern.vm_compressor", &vm_compressor_mode, sizeof(vm_compressor_mode));
}
printf("\"vm_compressor_mode\" is %d\n", vm_compressor_mode);
}
void vm_rtfault_record_init(void) {
PE_parse_boot_argn("vm_rtfault_records", &vmrtf_num_records, sizeof(vmrtf_num_records));
assert(vmrtf_num_records >= 1);
vmrtf_num_records = MAX(vmrtf_num_records, 1);
size_t kallocsz = vmrtf_num_records * sizeof(vm_rtfault_record_t);
vmrtfrs.vm_rtf_records = kalloc(kallocsz);
bzero(vmrtfrs.vm_rtf_records, kallocsz);
vmrtfrs.vmrtfr_maxi = vmrtf_num_records - 1;
lck_spin_init(&vm_rtfr_slock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
}
/*
* Routine: vm_fault_cleanup
* Purpose:
* Clean up the result of vm_fault_page.
* Results:
* The paging reference for "object" is released.
* "object" is unlocked.
* If "top_page" is not null, "top_page" is
* freed and the paging reference for the object
* containing it is released.
*
* In/out conditions:
* "object" must be locked.
*/
void
vm_fault_cleanup(
vm_object_t object,
vm_page_t top_page)
{
vm_object_paging_end(object);
vm_object_unlock(object);
if (top_page != VM_PAGE_NULL) {
object = VM_PAGE_OBJECT(top_page);
vm_object_lock(object);
VM_PAGE_FREE(top_page);
vm_object_paging_end(object);
vm_object_unlock(object);
}
}
#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
boolean_t vm_page_deactivate_behind = TRUE;
/*
* default sizes given VM_BEHAVIOR_DEFAULT reference behavior
*/
#define VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW 128
#define VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER 16 /* don't make this too big... */
/* we use it to size an array on the stack */
int vm_default_behind = VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW;
#define MAX_SEQUENTIAL_RUN (1024 * 1024 * 1024)
/*
* vm_page_is_sequential
*
* Determine if sequential access is in progress
* in accordance with the behavior specified.
* Update state to indicate current access pattern.
*
* object must have at least the shared lock held
*/
static
void
vm_fault_is_sequential(
vm_object_t object,
vm_object_offset_t offset,
vm_behavior_t behavior)
{
vm_object_offset_t last_alloc;
int sequential;
int orig_sequential;
last_alloc = object->last_alloc;
sequential = object->sequential;
orig_sequential = sequential;
switch (behavior) {
case VM_BEHAVIOR_RANDOM:
/*
* reset indicator of sequential behavior
*/
sequential = 0;
break;
case VM_BEHAVIOR_SEQUENTIAL:
if (offset && last_alloc == offset - PAGE_SIZE_64) {
/*
* advance indicator of sequential behavior
*/
if (sequential < MAX_SEQUENTIAL_RUN)
sequential += PAGE_SIZE;
} else {
/*
* reset indicator of sequential behavior
*/
sequential = 0;
}
break;
case VM_BEHAVIOR_RSEQNTL:
if (last_alloc && last_alloc == offset + PAGE_SIZE_64) {
/*
* advance indicator of sequential behavior
*/
if (sequential > -MAX_SEQUENTIAL_RUN)
sequential -= PAGE_SIZE;
} else {
/*
* reset indicator of sequential behavior
*/
sequential = 0;
}
break;
case VM_BEHAVIOR_DEFAULT:
default:
if (offset && last_alloc == (offset - PAGE_SIZE_64)) {
/*
* advance indicator of sequential behavior
*/
if (sequential < 0)
sequential = 0;
if (sequential < MAX_SEQUENTIAL_RUN)
sequential += PAGE_SIZE;
} else if (last_alloc && last_alloc == (offset + PAGE_SIZE_64)) {
/*
* advance indicator of sequential behavior
*/
if (sequential > 0)
sequential = 0;
if (sequential > -MAX_SEQUENTIAL_RUN)
sequential -= PAGE_SIZE;
} else {
/*
* reset indicator of sequential behavior
*/
sequential = 0;
}
break;
}
if (sequential != orig_sequential) {
if (!OSCompareAndSwap(orig_sequential, sequential, (UInt32 *)&object->sequential)) {
/*
* if someone else has already updated object->sequential
* don't bother trying to update it or object->last_alloc
*/
return;
}
}
/*
* I'd like to do this with a OSCompareAndSwap64, but that
* doesn't exist for PPC... however, it shouldn't matter
* that much... last_alloc is maintained so that we can determine
* if a sequential access pattern is taking place... if only
* one thread is banging on this object, no problem with the unprotected
* update... if 2 or more threads are banging away, we run the risk of
* someone seeing a mangled update... however, in the face of multiple
* accesses, no sequential access pattern can develop anyway, so we
* haven't lost any real info.
*/
object->last_alloc = offset;
}
int vm_page_deactivate_behind_count = 0;
/*
* vm_page_deactivate_behind
*
* Determine if sequential access is in progress
* in accordance with the behavior specified. If
* so, compute a potential page to deactivate and
* deactivate it.
*
* object must be locked.
*
* return TRUE if we actually deactivate a page
*/
static
boolean_t
vm_fault_deactivate_behind(
vm_object_t object,
vm_object_offset_t offset,
vm_behavior_t behavior)
{
int n;
int pages_in_run = 0;
int max_pages_in_run = 0;
int sequential_run;
int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
vm_object_offset_t run_offset = 0;
vm_object_offset_t pg_offset = 0;
vm_page_t m;
vm_page_t page_run[VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER];
pages_in_run = 0;
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */
#endif
if (object == kernel_object || vm_page_deactivate_behind == FALSE) {
/*
* Do not deactivate pages from the kernel object: they
* are not intended to become pageable.
* or we've disabled the deactivate behind mechanism
*/
return FALSE;
}
if ((sequential_run = object->sequential)) {
if (sequential_run < 0) {
sequential_behavior = VM_BEHAVIOR_RSEQNTL;
sequential_run = 0 - sequential_run;
} else {
sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
}
}
switch (behavior) {
case VM_BEHAVIOR_RANDOM:
break;
case VM_BEHAVIOR_SEQUENTIAL:
if (sequential_run >= (int)PAGE_SIZE) {
run_offset = 0 - PAGE_SIZE_64;
max_pages_in_run = 1;
}
break;
case VM_BEHAVIOR_RSEQNTL:
if (sequential_run >= (int)PAGE_SIZE) {
run_offset = PAGE_SIZE_64;
max_pages_in_run = 1;
}
break;
case VM_BEHAVIOR_DEFAULT:
default:
{ vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
/*
* determine if the run of sequential accesss has been
* long enough on an object with default access behavior
* to consider it for deactivation
*/
if ((uint64_t)sequential_run >= behind && (sequential_run % (VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER * PAGE_SIZE)) == 0) {
/*
* the comparisons between offset and behind are done
* in this kind of odd fashion in order to prevent wrap around
* at the end points
*/
if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) {
if (offset >= behind) {
run_offset = 0 - behind;
pg_offset = PAGE_SIZE_64;
max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER;
}
} else {
if (offset < -behind) {
run_offset = behind;
pg_offset = 0 - PAGE_SIZE_64;
max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER;
}
}
}
break;
}
}
for (n = 0; n < max_pages_in_run; n++) {
m = vm_page_lookup(object, offset + run_offset + (n * pg_offset));
if (m && !m->vmp_laundry && !m->vmp_busy && !m->vmp_no_cache && (m->vmp_q_state != VM_PAGE_ON_THROTTLED_Q) && !m->vmp_fictitious && !m->vmp_absent) {
page_run[pages_in_run++] = m;
/*
* by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
*
* a TLB flush isn't really needed here since at worst we'll miss the reference bit being
* updated in the PTE if a remote processor still has this mapping cached in its TLB when the
* new reference happens. If no futher references happen on the page after that remote TLB flushes
* we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
* by pageout_scan, which is just fine since the last reference would have happened quite far
* in the past (TLB caches don't hang around for very long), and of course could just as easily
* have happened before we did the deactivate_behind.
*/
pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
}
}
if (pages_in_run) {
vm_page_lockspin_queues();
for (n = 0; n < pages_in_run; n++) {
m = page_run[n];
vm_page_deactivate_internal(m, FALSE);
vm_page_deactivate_behind_count++;
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */
#endif
}
vm_page_unlock_queues();
return TRUE;
}
return FALSE;
}
#if (DEVELOPMENT || DEBUG)
uint32_t vm_page_creation_throttled_hard = 0;
uint32_t vm_page_creation_throttled_soft = 0;
uint64_t vm_page_creation_throttle_avoided = 0;
#endif /* DEVELOPMENT || DEBUG */
static int
vm_page_throttled(boolean_t page_kept)
{
clock_sec_t elapsed_sec;
clock_sec_t tv_sec;
clock_usec_t tv_usec;
thread_t thread = current_thread();
if (thread->options & TH_OPT_VMPRIV)
return (0);
if (thread->t_page_creation_throttled) {
thread->t_page_creation_throttled = 0;
if (page_kept == FALSE)
goto no_throttle;
}
if (NEED_TO_HARD_THROTTLE_THIS_TASK()) {
#if (DEVELOPMENT || DEBUG)
thread->t_page_creation_throttled_hard++;
OSAddAtomic(1, &vm_page_creation_throttled_hard);
#endif /* DEVELOPMENT || DEBUG */
return (HARD_THROTTLE_DELAY);
}
if ((vm_page_free_count < vm_page_throttle_limit || (VM_CONFIG_COMPRESSOR_IS_PRESENT && SWAPPER_NEEDS_TO_UNTHROTTLE())) &&
thread->t_page_creation_count > (VM_PAGE_CREATION_THROTTLE_PERIOD_SECS * VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC)) {
if (vm_page_free_wanted == 0 && vm_page_free_wanted_privileged == 0) {
#if (DEVELOPMENT || DEBUG)
OSAddAtomic64(1, &vm_page_creation_throttle_avoided);
#endif
goto no_throttle;
}
clock_get_system_microtime(&tv_sec, &tv_usec);
elapsed_sec = tv_sec - thread->t_page_creation_time;
if (elapsed_sec <= VM_PAGE_CREATION_THROTTLE_PERIOD_SECS ||
(thread->t_page_creation_count / elapsed_sec) >= VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC) {
if (elapsed_sec >= (3 * VM_PAGE_CREATION_THROTTLE_PERIOD_SECS)) {
/*
* we'll reset our stats to give a well behaved app
* that was unlucky enough to accumulate a bunch of pages
* over a long period of time a chance to get out of
* the throttled state... we reset the counter and timestamp
* so that if it stays under the rate limit for the next second
* it will be back in our good graces... if it exceeds it, it
* will remain in the throttled state
*/
thread->t_page_creation_time = tv_sec;
thread->t_page_creation_count = VM_PAGE_CREATION_THROTTLE_RATE_PER_SEC * (VM_PAGE_CREATION_THROTTLE_PERIOD_SECS - 1);
}
VM_PAGEOUT_DEBUG(vm_page_throttle_count, 1);
thread->t_page_creation_throttled = 1;
if (VM_CONFIG_COMPRESSOR_IS_PRESENT && HARD_THROTTLE_LIMIT_REACHED()) {
#if (DEVELOPMENT || DEBUG)
thread->t_page_creation_throttled_hard++;
OSAddAtomic(1, &vm_page_creation_throttled_hard);
#endif /* DEVELOPMENT || DEBUG */
return (HARD_THROTTLE_DELAY);
} else {
#if (DEVELOPMENT || DEBUG)
thread->t_page_creation_throttled_soft++;
OSAddAtomic(1, &vm_page_creation_throttled_soft);
#endif /* DEVELOPMENT || DEBUG */
return (SOFT_THROTTLE_DELAY);
}
}
thread->t_page_creation_time = tv_sec;
thread->t_page_creation_count = 0;
}
no_throttle:
thread->t_page_creation_count++;
return (0);
}
/*
* check for various conditions that would
* prevent us from creating a ZF page...
* cleanup is based on being called from vm_fault_page
*
* object must be locked
* object == m->vmp_object
*/
static vm_fault_return_t
vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, wait_interrupt_t interruptible_state, boolean_t page_throttle)
{
int throttle_delay;
if (object->shadow_severed ||
VM_OBJECT_PURGEABLE_FAULT_ERROR(object)) {
/*
* Either:
* 1. the shadow chain was severed,
* 2. the purgeable object is volatile or empty and is marked
* to fault on access while volatile.
* Just have to return an error at this point
*/
if (m != VM_PAGE_NULL)
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_ERROR);
}
if (page_throttle == TRUE) {
if ((throttle_delay = vm_page_throttled(FALSE))) {
/*
* we're throttling zero-fills...
* treat this as if we couldn't grab a page
*/
if (m != VM_PAGE_NULL)
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
delay(throttle_delay);
if (current_thread_aborted()) {
thread_interrupt_level(interruptible_state);
return VM_FAULT_INTERRUPTED;
}
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_SHORTAGE);
}
}
return (VM_FAULT_SUCCESS);
}
/*
* do the work to zero fill a page and
* inject it into the correct paging queue
*
* m->vmp_object must be locked
* page queue lock must NOT be held
*/
static int
vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
{
int my_fault = DBG_ZERO_FILL_FAULT;
vm_object_t object;
object = VM_PAGE_OBJECT(m);
/*
* This is is a zero-fill page fault...
*
* Checking the page lock is a waste of
* time; this page was absent, so
* it can't be page locked by a pager.
*
* we also consider it undefined
* with respect to instruction
* execution. i.e. it is the responsibility
* of higher layers to call for an instruction
* sync after changing the contents and before
* sending a program into this area. We
* choose this approach for performance
*/
m->vmp_pmapped = TRUE;
m->vmp_cs_validated = FALSE;
m->vmp_cs_tainted = FALSE;
m->vmp_cs_nx = FALSE;
if (no_zero_fill == TRUE) {
my_fault = DBG_NZF_PAGE_FAULT;
if (m->vmp_absent && m->vmp_busy)
return (my_fault);
} else {
vm_page_zero_fill(m);
VM_STAT_INCR(zero_fill_count);
DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL);
}
assert(!m->vmp_laundry);
assert(object != kernel_object);
//assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
if (!VM_DYNAMIC_PAGING_ENABLED() &&
(object->purgable == VM_PURGABLE_DENY ||
object->purgable == VM_PURGABLE_NONVOLATILE ||
object->purgable == VM_PURGABLE_VOLATILE )) {
vm_page_lockspin_queues();
if (!VM_DYNAMIC_PAGING_ENABLED()) {
assert(!VM_PAGE_WIRED(m));
/*
* can't be on the pageout queue since we don't
* have a pager to try and clean to
*/
vm_page_queues_remove(m, TRUE);
vm_page_check_pageable_safe(m);
vm_page_queue_enter(&vm_page_queue_throttled, m, vm_page_t, vmp_pageq);
m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
vm_page_throttled_count++;
}
vm_page_unlock_queues();
}
return (my_fault);
}
/*
* Routine: vm_fault_page
* Purpose:
* Find the resident page for the virtual memory
* specified by the given virtual memory object
* and offset.
* Additional arguments:
* The required permissions for the page is given
* in "fault_type". Desired permissions are included
* in "protection".
* fault_info is passed along to determine pagein cluster
* limits... it contains the expected reference pattern,
* cluster size if available, etc...
*
* If the desired page is known to be resident (for
* example, because it was previously wired down), asserting
* the "unwiring" parameter will speed the search.
*
* If the operation can be interrupted (by thread_abort
* or thread_terminate), then the "interruptible"
* parameter should be asserted.
*
* Results:
* The page containing the proper data is returned
* in "result_page".
*
* In/out conditions:
* The source object must be locked and referenced,
* and must donate one paging reference. The reference
* is not affected. The paging reference and lock are
* consumed.
*
* If the call succeeds, the object in which "result_page"
* resides is left locked and holding a paging reference.
* If this is not the original object, a busy page in the
* original object is returned in "top_page", to prevent other
* callers from pursuing this same data, along with a paging
* reference for the original object. The "top_page" should
* be destroyed when this guarantee is no longer required.
* The "result_page" is also left busy. It is not removed
* from the pageout queues.
* Special Case:
* A return value of VM_FAULT_SUCCESS_NO_PAGE means that the
* fault succeeded but there's no VM page (i.e. the VM object
* does not actually hold VM pages, but device memory or
* large pages). The object is still locked and we still hold a
* paging_in_progress reference.
*/
unsigned int vm_fault_page_blocked_access = 0;
unsigned int vm_fault_page_forced_retry = 0;
vm_fault_return_t
vm_fault_page(
/* Arguments: */
vm_object_t first_object, /* Object to begin search */
vm_object_offset_t first_offset, /* Offset into object */
vm_prot_t fault_type, /* What access is requested */
boolean_t must_be_resident,/* Must page be resident? */
boolean_t caller_lookup, /* caller looked up page */
/* Modifies in place: */
vm_prot_t *protection, /* Protection for mapping */
vm_page_t *result_page, /* Page found, if successful */
/* Returns: */
vm_page_t *top_page, /* Page in top object, if
* not result_page. */
int *type_of_fault, /* if non-null, fill in with type of fault
* COW, zero-fill, etc... returned in trace point */
/* More arguments: */
kern_return_t *error_code, /* code if page is in error */
boolean_t no_zero_fill, /* don't zero fill absent pages */
boolean_t data_supply, /* treat as data_supply if
* it is a write fault and a full
* page is provided */
vm_object_fault_info_t fault_info)
{
vm_page_t m;
vm_object_t object;
vm_object_offset_t offset;
vm_page_t first_m;
vm_object_t next_object;
vm_object_t copy_object;
boolean_t look_for_page;
boolean_t force_fault_retry = FALSE;
vm_prot_t access_required = fault_type;
vm_prot_t wants_copy_flag;
kern_return_t wait_result;
wait_interrupt_t interruptible_state;
boolean_t data_already_requested = FALSE;
vm_behavior_t orig_behavior;
vm_size_t orig_cluster_size;
vm_fault_return_t error;
int my_fault;
uint32_t try_failed_count;
int interruptible; /* how may fault be interrupted? */
int external_state = VM_EXTERNAL_STATE_UNKNOWN;
memory_object_t pager;
vm_fault_return_t retval;
int grab_options;
/*
* MUST_ASK_PAGER() evaluates to TRUE if the page specified by object/offset is
* marked as paged out in the compressor pager or the pager doesn't exist.
* Note also that if the pager for an internal object
* has not been created, the pager is not invoked regardless of the value
* of MUST_ASK_PAGER().
*
* PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
* is marked as paged out in the compressor pager.
* PAGED_OUT() is used to determine if a page has already been pushed
* into a copy object in order to avoid a redundant page out operation.
*/
#define MUST_ASK_PAGER(o, f, s) \
((s = VM_COMPRESSOR_PAGER_STATE_GET((o), (f))) != VM_EXTERNAL_STATE_ABSENT)
#define PAGED_OUT(o, f) \
(VM_COMPRESSOR_PAGER_STATE_GET((o), (f)) == VM_EXTERNAL_STATE_EXISTS)
/*
* Recovery actions
*/
#define RELEASE_PAGE(m) \
MACRO_BEGIN \
PAGE_WAKEUP_DONE(m); \
if ( !VM_PAGE_PAGEABLE(m)) { \
vm_page_lockspin_queues(); \
if ( !VM_PAGE_PAGEABLE(m)) { \
if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) \
vm_page_deactivate(m); \
else \
vm_page_activate(m); \
} \
vm_page_unlock_queues(); \
} \
MACRO_END
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */
#endif
interruptible = fault_info->interruptible;
interruptible_state = thread_interrupt_level(interruptible);
/*
* INVARIANTS (through entire routine):
*
* 1) At all times, we must either have the object
* lock or a busy page in some object to prevent
* some other thread from trying to bring in
* the same page.
*
* Note that we cannot hold any locks during the
* pager access or when waiting for memory, so
* we use a busy page then.
*
* 2) To prevent another thread from racing us down the
* shadow chain and entering a new page in the top
* object before we do, we must keep a busy page in
* the top object while following the shadow chain.
*
* 3) We must increment paging_in_progress on any object
* for which we have a busy page before dropping
* the object lock
*
* 4) We leave busy pages on the pageout queues.
* If the pageout daemon comes across a busy page,
* it will remove the page from the pageout queues.
*/
object = first_object;
offset = first_offset;
first_m = VM_PAGE_NULL;
access_required = fault_type;
XPR(XPR_VM_FAULT,
"vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
object, offset, fault_type, *protection, 0);
/*
* default type of fault
*/
my_fault = DBG_CACHE_HIT_FAULT;
while (TRUE) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */
#endif
grab_options = 0;
#if CONFIG_SECLUDED_MEMORY
if (object->can_grab_secluded) {
grab_options |= VM_PAGE_GRAB_SECLUDED;