/
pmap.c
14296 lines (12286 loc) · 417 KB
/
pmap.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2011-2021 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#include <string.h>
#include <stdlib.h>
#include <mach_assert.h>
#include <mach_ldebug.h>
#include <mach/shared_region.h>
#include <mach/vm_param.h>
#include <mach/vm_prot.h>
#include <mach/vm_map.h>
#include <mach/machine/vm_param.h>
#include <mach/machine/vm_types.h>
#include <mach/boolean.h>
#include <kern/bits.h>
#include <kern/ecc.h>
#include <kern/thread.h>
#include <kern/sched.h>
#include <kern/zalloc.h>
#include <kern/zalloc_internal.h>
#include <kern/kalloc.h>
#include <kern/spl.h>
#include <kern/startup.h>
#include <kern/trustcache.h>
#include <os/overflow.h>
#include <vm/pmap.h>
#include <vm/pmap_cs.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <vm/vm_protos.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/cpm.h>
#include <libkern/img4/interface.h>
#include <libkern/amfi/amfi.h>
#include <libkern/section_keywords.h>
#include <sys/errno.h>
#include <sys/code_signing.h>
#include <sys/trust_caches.h>
#include <machine/atomic.h>
#include <machine/thread.h>
#include <machine/lowglobals.h>
#include <arm/caches_internal.h>
#include <arm/cpu_data.h>
#include <arm/cpu_data_internal.h>
#include <arm/cpu_capabilities.h>
#include <arm/cpu_number.h>
#include <arm/machine_cpu.h>
#include <arm/misc_protos.h>
#include <arm/pmap/pmap_internal.h>
#include <arm/trap.h>
#include <arm64/proc_reg.h>
#include <pexpert/arm64/boot.h>
#include <arm64/ppl/sart.h>
#include <arm64/ppl/uat.h>
#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
#include <arm64/amcc_rorgn.h>
#endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
#include <pexpert/device_tree.h>
#include <san/kasan.h>
#include <sys/cdefs.h>
#if defined(HAS_APPLE_PAC)
#include <ptrauth.h>
#endif
#ifdef CONFIG_XNUPOST
#include <tests/xnupost.h>
#endif
#if HIBERNATION
#include <IOKit/IOHibernatePrivate.h>
#endif /* HIBERNATION */
#ifdef __ARM64_PMAP_SUBPAGE_L1__
#define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
#else
#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
#endif
#if __ARM_VMSA__ != 8
#error Unknown __ARM_VMSA__
#endif
#define ARRAY_LEN(x) (sizeof (x) / sizeof (x[0]))
extern u_int32_t random(void); /* from <libkern/libkern.h> */
static bool alloc_asid(pmap_t pmap);
static void free_asid(pmap_t pmap);
static void flush_mmu_tlb_region_asid_async(vm_offset_t va, size_t length, pmap_t pmap, bool last_level_only);
static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
static pt_entry_t wimg_to_pte(unsigned int wimg, pmap_paddr_t pa);
const struct page_table_ops native_pt_ops =
{
.alloc_id = alloc_asid,
.free_id = free_asid,
.flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
.flush_tlb_async = flush_mmu_tlb_full_asid_async,
.wimg_to_pte = wimg_to_pte,
};
const struct page_table_level_info pmap_table_level_info_16k[] =
{
[0] = {
.size = ARM_16K_TT_L0_SIZE,
.offmask = ARM_16K_TT_L0_OFFMASK,
.shift = ARM_16K_TT_L0_SHIFT,
.index_mask = ARM_16K_TT_L0_INDEX_MASK,
.valid_mask = ARM_TTE_VALID,
.type_mask = ARM_TTE_TYPE_MASK,
.type_block = ARM_TTE_TYPE_BLOCK
},
[1] = {
.size = ARM_16K_TT_L1_SIZE,
.offmask = ARM_16K_TT_L1_OFFMASK,
.shift = ARM_16K_TT_L1_SHIFT,
.index_mask = ARM_16K_TT_L1_INDEX_MASK,
.valid_mask = ARM_TTE_VALID,
.type_mask = ARM_TTE_TYPE_MASK,
.type_block = ARM_TTE_TYPE_BLOCK
},
[2] = {
.size = ARM_16K_TT_L2_SIZE,
.offmask = ARM_16K_TT_L2_OFFMASK,
.shift = ARM_16K_TT_L2_SHIFT,
.index_mask = ARM_16K_TT_L2_INDEX_MASK,
.valid_mask = ARM_TTE_VALID,
.type_mask = ARM_TTE_TYPE_MASK,
.type_block = ARM_TTE_TYPE_BLOCK
},
[3] = {
.size = ARM_16K_TT_L3_SIZE,
.offmask = ARM_16K_TT_L3_OFFMASK,
.shift = ARM_16K_TT_L3_SHIFT,
.index_mask = ARM_16K_TT_L3_INDEX_MASK,
.valid_mask = ARM_PTE_TYPE_VALID,
.type_mask = ARM_PTE_TYPE_MASK,
.type_block = ARM_TTE_TYPE_L3BLOCK
}
};
const struct page_table_level_info pmap_table_level_info_4k[] =
{
[0] = {
.size = ARM_4K_TT_L0_SIZE,
.offmask = ARM_4K_TT_L0_OFFMASK,
.shift = ARM_4K_TT_L0_SHIFT,
.index_mask = ARM_4K_TT_L0_INDEX_MASK,
.valid_mask = ARM_TTE_VALID,
.type_mask = ARM_TTE_TYPE_MASK,
.type_block = ARM_TTE_TYPE_BLOCK
},
[1] = {
.size = ARM_4K_TT_L1_SIZE,
.offmask = ARM_4K_TT_L1_OFFMASK,
.shift = ARM_4K_TT_L1_SHIFT,
.index_mask = ARM_4K_TT_L1_INDEX_MASK,
.valid_mask = ARM_TTE_VALID,
.type_mask = ARM_TTE_TYPE_MASK,
.type_block = ARM_TTE_TYPE_BLOCK
},
[2] = {
.size = ARM_4K_TT_L2_SIZE,
.offmask = ARM_4K_TT_L2_OFFMASK,
.shift = ARM_4K_TT_L2_SHIFT,
.index_mask = ARM_4K_TT_L2_INDEX_MASK,
.valid_mask = ARM_TTE_VALID,
.type_mask = ARM_TTE_TYPE_MASK,
.type_block = ARM_TTE_TYPE_BLOCK
},
[3] = {
.size = ARM_4K_TT_L3_SIZE,
.offmask = ARM_4K_TT_L3_OFFMASK,
.shift = ARM_4K_TT_L3_SHIFT,
.index_mask = ARM_4K_TT_L3_INDEX_MASK,
.valid_mask = ARM_PTE_TYPE_VALID,
.type_mask = ARM_PTE_TYPE_MASK,
.type_block = ARM_TTE_TYPE_L3BLOCK
}
};
const struct page_table_attr pmap_pt_attr_4k = {
.pta_level_info = pmap_table_level_info_4k,
.pta_root_level = (T0SZ_BOOT - 16) / 9,
#if __ARM_MIXED_PAGE_SIZE__
.pta_commpage_level = PMAP_TT_L2_LEVEL,
#else /* __ARM_MIXED_PAGE_SIZE__ */
#if __ARM_16K_PG__
.pta_commpage_level = PMAP_TT_L2_LEVEL,
#else /* __ARM_16K_PG__ */
.pta_commpage_level = PMAP_TT_L1_LEVEL,
#endif /* __ARM_16K_PG__ */
#endif /* __ARM_MIXED_PAGE_SIZE__ */
.pta_max_level = PMAP_TT_L3_LEVEL,
.pta_ops = &native_pt_ops,
.ap_ro = ARM_PTE_AP(AP_RORO),
.ap_rw = ARM_PTE_AP(AP_RWRW),
.ap_rona = ARM_PTE_AP(AP_RONA),
.ap_rwna = ARM_PTE_AP(AP_RWNA),
.ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
.ap_x = ARM_PTE_PNX,
#if __ARM_MIXED_PAGE_SIZE__
.pta_tcr_value = TCR_EL1_4KB,
#endif /* __ARM_MIXED_PAGE_SIZE__ */
.pta_page_size = 4096,
.pta_page_shift = 12,
};
const struct page_table_attr pmap_pt_attr_16k = {
.pta_level_info = pmap_table_level_info_16k,
.pta_root_level = PMAP_TT_L1_LEVEL,
.pta_commpage_level = PMAP_TT_L2_LEVEL,
.pta_max_level = PMAP_TT_L3_LEVEL,
.pta_ops = &native_pt_ops,
.ap_ro = ARM_PTE_AP(AP_RORO),
.ap_rw = ARM_PTE_AP(AP_RWRW),
.ap_rona = ARM_PTE_AP(AP_RONA),
.ap_rwna = ARM_PTE_AP(AP_RWNA),
.ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
.ap_x = ARM_PTE_PNX,
#if __ARM_MIXED_PAGE_SIZE__
.pta_tcr_value = TCR_EL1_16KB,
#endif /* __ARM_MIXED_PAGE_SIZE__ */
.pta_page_size = 16384,
.pta_page_shift = 14,
};
#if __ARM_16K_PG__
const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
#else /* !__ARM_16K_PG__ */
const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
#endif /* !__ARM_16K_PG__ */
#if MACH_ASSERT
int vm_footprint_suspend_allowed = 1;
extern int pmap_ledgers_panic;
extern int pmap_ledgers_panic_leeway;
#endif /* MACH_ASSERT */
#if DEVELOPMENT || DEBUG
#define PMAP_FOOTPRINT_SUSPENDED(pmap) \
(current_thread()->pmap_footprint_suspended)
#else /* DEVELOPMENT || DEBUG */
#define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
#endif /* DEVELOPMENT || DEBUG */
/*
* Represents a tlb range that will be flushed before exiting
* the ppl.
* Used by phys_attribute_clear_range to defer flushing pages in
* this range until the end of the operation.
*/
typedef struct pmap_tlb_flush_range {
pmap_t ptfr_pmap;
vm_map_address_t ptfr_start;
vm_map_address_t ptfr_end;
bool ptfr_flush_needed;
} pmap_tlb_flush_range_t;
#if XNU_MONITOR
/*
* PPL External References.
*/
extern vm_offset_t segPPLDATAB;
extern unsigned long segSizePPLDATA;
extern vm_offset_t segPPLTEXTB;
extern unsigned long segSizePPLTEXT;
extern vm_offset_t segPPLDATACONSTB;
extern unsigned long segSizePPLDATACONST;
/*
* PPL Global Variables
*/
#if (DEVELOPMENT || DEBUG) || CONFIG_CSR_FROM_DT
/* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
#else
const boolean_t pmap_ppl_disable = FALSE;
#endif
/*
* Indicates if the PPL has started applying APRR.
* This variable is accessed from various assembly trampolines, so be sure to change
* those if you change the size or layout of this variable.
*/
boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
extern void *pmap_stacks_start;
extern void *pmap_stacks_end;
#endif /* !XNU_MONITOR */
/* Virtual memory region for early allocation */
#define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
#define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
#define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
extern uint8_t bootstrap_pagetables[];
extern unsigned int not_in_kdp;
extern vm_offset_t first_avail;
extern vm_offset_t virtual_space_start; /* Next available kernel VA */
extern vm_offset_t virtual_space_end; /* End of kernel address space */
extern vm_offset_t static_memory_end;
extern const vm_map_address_t physmap_base;
extern const vm_map_address_t physmap_end;
extern int maxproc, hard_maxproc;
/* The number of address bits one TTBR can cover. */
#define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
/*
* The bounds on our TTBRs. These are for sanity checking that
* an address is accessible by a TTBR before we attempt to map it.
*/
/* The level of the root of a page table. */
const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
/* The number of entries in the root TT of a page table. */
const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
const pmap_t kernel_pmap = &kernel_pmap_store;
static SECURITY_READ_ONLY_LATE(zone_t) pmap_zone; /* zone of pmap structures */
MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmaps_lock, 0);
MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(tt1_lock, 0);
queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
typedef struct tt_free_entry {
struct tt_free_entry *next;
} tt_free_entry_t;
#define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
#define FREE_PAGE_SIZE_TT_MAX 4
tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
#define FREE_TWO_PAGE_SIZE_TT_MAX 4
tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
unsigned int free_tt_count MARK_AS_PMAP_DATA;
unsigned int free_tt_max MARK_AS_PMAP_DATA;
#define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
/* Lock group used for all pmap object locks. */
lck_grp_t pmap_lck_grp MARK_AS_PMAP_DATA;
#if DEVELOPMENT || DEBUG
int nx_enabled = 1; /* enable no-execute protection */
int allow_data_exec = 0; /* No apps may execute data */
int allow_stack_exec = 0; /* No apps may execute from the stack */
unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
unsigned long pmap_asid_hits MARK_AS_PMAP_DATA = 0;
unsigned long pmap_asid_misses MARK_AS_PMAP_DATA = 0;
#else /* DEVELOPMENT || DEBUG */
const int nx_enabled = 1; /* enable no-execute protection */
const int allow_data_exec = 0; /* No apps may execute data */
const int allow_stack_exec = 0; /* No apps may execute from the stack */
#endif /* DEVELOPMENT || DEBUG */
/**
* This variable is set true during hibernation entry to protect pmap data structures
* during image copying, and reset false on hibernation exit.
*/
bool hib_entry_pmap_lockdown MARK_AS_PMAP_DATA = false;
#if MACH_ASSERT
static void pmap_check_ledgers(pmap_t pmap);
#else
static inline void
pmap_check_ledgers(__unused pmap_t pmap)
{
}
#endif /* MACH_ASSERT */
/**
* This helper function ensures that potentially-long-running batched PPL operations are
* called in preemptible context before entering the PPL, so that the PPL call may
* periodically exit to allow pending urgent ASTs to be taken.
*/
static inline void
pmap_verify_preemptible(void)
{
assert(preemption_enabled() || (startup_phase < STARTUP_SUB_EARLY_BOOT));
}
SIMPLE_LOCK_DECLARE(phys_backup_lock, 0);
SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
#if defined(__arm64__)
/* end of shared region + 512MB for various purposes */
#define ARM64_MIN_MAX_ADDRESS (SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000)
_Static_assert((ARM64_MIN_MAX_ADDRESS > SHARED_REGION_BASE_ARM64) && (ARM64_MIN_MAX_ADDRESS <= MACH_VM_MAX_ADDRESS),
"Minimum address space size outside allowable range");
// Max offset is 13.375GB for devices with "large" memory config
#define ARM64_MAX_OFFSET_DEVICE_LARGE (ARM64_MIN_MAX_ADDRESS + 0x138000000)
// Max offset is 9.375GB for devices with "small" memory config
#define ARM64_MAX_OFFSET_DEVICE_SMALL (ARM64_MIN_MAX_ADDRESS + 0x38000000)
_Static_assert((ARM64_MAX_OFFSET_DEVICE_LARGE > ARM64_MIN_MAX_ADDRESS) && (ARM64_MAX_OFFSET_DEVICE_LARGE <= MACH_VM_MAX_ADDRESS),
"Large device address space size outside allowable range");
_Static_assert((ARM64_MAX_OFFSET_DEVICE_SMALL > ARM64_MIN_MAX_ADDRESS) && (ARM64_MAX_OFFSET_DEVICE_SMALL <= MACH_VM_MAX_ADDRESS),
"Small device address space size outside allowable range");
# ifdef XNU_TARGET_OS_OSX
SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = MACH_VM_MAX_ADDRESS;
# else
SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
# endif
#endif /* __arm64__ */
#if PMAP_PANIC_DEV_WIMG_ON_MANAGED && (DEVELOPMENT || DEBUG)
SECURITY_READ_ONLY_LATE(boolean_t) pmap_panic_dev_wimg_on_managed = TRUE;
#else
SECURITY_READ_ONLY_LATE(boolean_t) pmap_panic_dev_wimg_on_managed = FALSE;
#endif
MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(asid_lock, 0);
SECURITY_READ_ONLY_LATE(uint32_t) pmap_max_asids = 0;
SECURITY_READ_ONLY_LATE(int) pmap_asid_plru = 1;
SECURITY_READ_ONLY_LATE(uint16_t) asid_chunk_size = 0;
SECURITY_READ_ONLY_LATE(static bitmap_t*) asid_bitmap;
static bitmap_t asid_plru_bitmap[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA;
static uint64_t asid_plru_generation[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA = {0};
static uint64_t asid_plru_gencount MARK_AS_PMAP_DATA = 0;
#if __ARM_MIXED_PAGE_SIZE__
SECURITY_READ_ONLY_LATE(pmap_t) commpage_pmap_4k;
#endif
SECURITY_READ_ONLY_LATE(pmap_t) commpage_pmap_default;
SECURITY_READ_ONLY_LATE(static vm_address_t) commpage_text_kva = 0;
SECURITY_READ_ONLY_LATE(static vm_address_t) commpage_ro_data_kva = 0;
/* PTE Define Macros */
#define ARM_PTE_IS_COMPRESSED(x, p) \
((((x) & 0x3) == 0) && /* PTE is not valid... */ \
((x) & ARM_PTE_COMPRESSED) && /* ...has "compressed" marker" */ \
((!((x) & ~ARM_PTE_COMPRESSED_MASK)) || /* ...no other bits */ \
(panic("compressed PTE %p 0x%llx has extra bits 0x%llx: corrupted?", \
(p), (x), (x) & ~ARM_PTE_COMPRESSED_MASK), FALSE)))
#define pte_is_wired(pte) \
(((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
#define pte_was_writeable(pte) \
(((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
#define pte_set_was_writeable(pte, was_writeable) \
do { \
if ((was_writeable)) { \
(pte) |= ARM_PTE_WRITEABLE; \
} else { \
(pte) &= ~ARM_PTE_WRITEABLE; \
} \
} while(0)
static inline void
pte_set_wired(pmap_t pmap, pt_entry_t *ptep, boolean_t wired)
{
if (wired) {
*ptep |= ARM_PTE_WIRED;
} else {
*ptep &= ~ARM_PTE_WIRED;
}
/*
* Do not track wired page count for kernel pagetable pages. Kernel mappings are
* not guaranteed to have PTDs in the first place, and kernel pagetable pages are
* never reclaimed.
*/
if (pmap == kernel_pmap) {
return;
}
unsigned short *ptd_wiredcnt_ptr;
ptd_wiredcnt_ptr = &(ptep_get_info(ptep)->wiredcnt);
if (wired) {
os_atomic_add(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
} else {
unsigned short prev_wired = os_atomic_sub_orig(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
if (__improbable(prev_wired == 0)) {
panic("pmap %p (pte %p): wired count underflow", pmap, ptep);
}
}
}
#define PMAP_UPDATE_TLBS(pmap, s, e, strong, last_level_only) { \
pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (size_t)((e) - (s)), pmap, last_level_only); \
arm64_sync_tlb(strong); \
}
/*
* Synchronize updates to PTEs that were previously invalid or had the AF bit cleared,
* therefore not requiring TLBI. Use a store-load barrier to ensure subsequent loads
* will observe the updated PTE.
*/
#define FLUSH_PTE() \
__builtin_arm_dmb(DMB_ISH);
/*
* Synchronize updates to PTEs that were previously valid and thus may be cached in
* TLBs. DSB is required to ensure the PTE stores have completed prior to the ensuing
* TLBI. This should only require a store-store barrier, as subsequent accesses in
* program order will not issue until the DSB completes. Prior loads may be reordered
* after the barrier, but their behavior should not be materially affected by the
* reordering. For fault-driven PTE updates such as COW, PTE contents should not
* matter for loads until the access is re-driven well after the TLB update is
* synchronized. For "involuntary" PTE access restriction due to paging lifecycle,
* we should be in a position to handle access faults. For "voluntary" PTE access
* restriction due to unmapping or protection, the decision to restrict access should
* have a data dependency on prior loads in order to avoid a data race.
*/
#define FLUSH_PTE_STRONG() \
__builtin_arm_dsb(DSB_ISHST);
/**
* Write enough page table entries to map a single VM page. On systems where the
* VM page size does not match the hardware page size, multiple page table
* entries will need to be written.
*
* @note This function does not emit a barrier to ensure these page table writes
* have completed before continuing. This is commonly needed. In the case
* where a DMB or DSB barrier is needed, then use the write_pte() and
* write_pte_strong() functions respectively instead of this one.
*
* @param ptep Pointer to the first page table entry to update.
* @param pte The value to write into each page table entry. In the case that
* multiple PTEs are updated to a non-empty value, then the address
* in this value will automatically be incremented for each PTE
* write.
*/
static void
write_pte_fast(pt_entry_t *ptep, pt_entry_t pte)
{
/**
* The PAGE_SHIFT (and in turn, the PAGE_RATIO) can be a variable on some
* systems, which is why it's checked at runtime instead of compile time.
* The "unreachable" warning needs to be suppressed because it still is a
* compile time constant on some systems.
*/
__unreachable_ok_push
if (TEST_PAGE_RATIO_4) {
if (((uintptr_t)ptep) & 0x1f) {
panic("%s: PTE write is unaligned, ptep=%p, pte=%p",
__func__, ptep, (void*)pte);
}
if ((pte & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) {
/**
* If we're writing an empty/compressed PTE value, then don't
* auto-increment the address for each PTE write.
*/
*ptep = pte;
*(ptep + 1) = pte;
*(ptep + 2) = pte;
*(ptep + 3) = pte;
} else {
*ptep = pte;
*(ptep + 1) = pte | 0x1000;
*(ptep + 2) = pte | 0x2000;
*(ptep + 3) = pte | 0x3000;
}
} else {
*ptep = pte;
}
__unreachable_ok_pop
}
/**
* Writes enough page table entries to map a single VM page and then ensures
* those writes complete by executing a Data Memory Barrier.
*
* @note The DMB issued by this function is not strong enough to protect against
* TLB invalidates from being reordered above the PTE writes. If a TLBI
* instruction is going to immediately be called after this write, it's
* recommended to call write_pte_strong() instead of this function.
*
* See the function header for write_pte_fast() for more details on the
* parameters.
*/
void
write_pte(pt_entry_t *ptep, pt_entry_t pte)
{
write_pte_fast(ptep, pte);
FLUSH_PTE();
}
/**
* Writes enough page table entries to map a single VM page and then ensures
* those writes complete by executing a Data Synchronization Barrier. This
* barrier provides stronger guarantees than the DMB executed by write_pte().
*
* @note This function is useful if you're going to immediately flush the TLB
* after making the PTE write. A DSB is required to protect against the
* TLB invalidate being reordered before the PTE write.
*
* See the function header for write_pte_fast() for more details on the
* parameters.
*/
static void
write_pte_strong(pt_entry_t *ptep, pt_entry_t pte)
{
write_pte_fast(ptep, pte);
FLUSH_PTE_STRONG();
}
/**
* Retrieve the pmap structure for the thread running on the current CPU.
*/
pmap_t
current_pmap()
{
const pmap_t current = vm_map_pmap(current_thread()->map);
assert(current != NULL);
#if XNU_MONITOR
/**
* On PPL-enabled systems, it's important that PPL policy decisions aren't
* decided by kernel-writable memory. This function is used in various parts
* of the PPL, and besides validating that the pointer returned by this
* function is indeed a pmap structure, it's also important to ensure that
* it's actually the current thread's pmap. This is because different pmaps
* will have access to different entitlements based on the code signature of
* their loaded process. So if a different user pmap is set in the current
* thread structure (in an effort to bypass code signing restrictions), even
* though the structure would validate correctly as it is a real pmap
* structure, it should fail here.
*
* This only needs to occur for user pmaps because the kernel pmap's root
* page table is always the same as TTBR1 (it's set during bootstrap and not
* changed so it'd be redundant to check), and its code signing fields are
* always set to NULL. The PMAP CS logic won't operate on the kernel pmap so
* it shouldn't be possible to set those fields. Due to that, an attacker
* setting the current thread's pmap to the kernel pmap as a way to bypass
* this check won't accomplish anything as it doesn't provide any extra code
* signing entitlements.
*/
if ((current != kernel_pmap) &&
((get_mmu_ttb() & TTBR_BADDR_MASK) != (current->ttep))) {
panic_plain("%s: Current thread's pmap doesn't match up with TTBR0 "
"%#llx %#llx", __func__, get_mmu_ttb(), current->ttep);
}
#endif /* XNU_MONITOR */
return current;
}
#if DEVELOPMENT || DEBUG
/*
* Trace levels are controlled by a bitmask in which each
* level can be enabled/disabled by the (1<<level) position
* in the boot arg
* Level 0: PPL extension functionality
* Level 1: pmap lifecycle (create/destroy/switch)
* Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
* Level 3: internal state management (attributes/fast-fault)
* Level 4-7: TTE traces for paging levels 0-3. TTBs are traced at level 4.
*/
SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
#define PMAP_TRACE(level, ...) \
if (__improbable((1 << (level)) & pmap_trace_mask)) { \
KDBG_RELEASE(__VA_ARGS__); \
}
#else /* DEVELOPMENT || DEBUG */
#define PMAP_TRACE(level, ...)
#endif /* DEVELOPMENT || DEBUG */
/*
* Internal function prototypes (forward declarations).
*/
static vm_map_size_t pmap_user_va_size(pmap_t pmap);
static void pmap_set_reference(ppnum_t pn);
pmap_paddr_t pmap_vtophys(pmap_t pmap, addr64_t va);
static void pmap_switch_user_ttb(pmap_t pmap, pmap_cpu_data_t *cpu_data_ptr);
static kern_return_t pmap_expand(
pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
static int pmap_remove_range(
pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *);
static tt_entry_t *pmap_tt1_allocate(
pmap_t, vm_size_t, unsigned int);
#define PMAP_TT_ALLOCATE_NOWAIT 0x1
static void pmap_tt1_deallocate(
pmap_t, tt_entry_t *, vm_size_t, unsigned int);
#define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
static kern_return_t pmap_tt_allocate(
pmap_t, tt_entry_t **, unsigned int, unsigned int);
#define PMAP_TT_ALLOCATE_NOWAIT 0x1
const unsigned int arm_hardware_page_size = ARM_PGBYTES;
const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
#define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
static void pmap_unmap_commpage(
pmap_t pmap);
static boolean_t
pmap_is_64bit(pmap_t);
static void pmap_flush_tlb_for_paddr_locked_async(pmap_paddr_t);
static void pmap_update_pp_attr_wimg_bits_locked(unsigned int, unsigned int);
static bool pmap_update_cache_attributes_locked(
ppnum_t, unsigned, bool);
static boolean_t arm_clear_fast_fault(
ppnum_t ppnum,
vm_prot_t fault_type,
pt_entry_t *pte_p);
static void pmap_trim_self(pmap_t pmap);
static void pmap_trim_subord(pmap_t subord);
/*
* Temporary prototypes, while we wait for pmap_enter to move to taking an
* address instead of a page number.
*/
static kern_return_t
pmap_enter_addr(
pmap_t pmap,
vm_map_address_t v,
pmap_paddr_t pa,
vm_prot_t prot,
vm_prot_t fault_type,
unsigned int flags,
boolean_t wired);
kern_return_t
pmap_enter_options_addr(
pmap_t pmap,
vm_map_address_t v,
pmap_paddr_t pa,
vm_prot_t prot,
vm_prot_t fault_type,
unsigned int flags,
boolean_t wired,
unsigned int options,
__unused void *arg);
#ifdef CONFIG_XNUPOST
kern_return_t pmap_test(void);
#endif /* CONFIG_XNUPOST */
PMAP_SUPPORT_PROTOTYPES(
kern_return_t,
arm_fast_fault, (pmap_t pmap,
vm_map_address_t va,
vm_prot_t fault_type,
bool was_af_fault,
bool from_user), ARM_FAST_FAULT_INDEX);
PMAP_SUPPORT_PROTOTYPES(
boolean_t,
arm_force_fast_fault, (ppnum_t ppnum,
vm_prot_t allow_mode,
int options), ARM_FORCE_FAST_FAULT_INDEX);
MARK_AS_PMAP_TEXT static boolean_t
arm_force_fast_fault_with_flush_range(
ppnum_t ppnum,
vm_prot_t allow_mode,
int options,
pmap_tlb_flush_range_t *flush_range);
/**
* Definition of the states driving the batch cache attributes update
* state machine.
*/
typedef struct {
uint64_t page_index : 32, /* The page index to be operated on */
state : 8, /* The current state of the update machine */
tlb_flush_pass_needed : 1, /* Tracking whether the tlb flush pass is necessary */
rt_cache_flush_pass_needed : 1, /* Tracking whether the cache flush pass is necessary */
:0;
} batch_set_cache_attr_state_t;
/* Possible values of the "state" field. */
#define PMAP_BATCH_SET_CACHE_ATTRIBUTES_UPDATE_PASS 1
#define PMAP_BATCH_SET_CACHE_ATTRIBUTES_TLBFLUSH_PASS 2
#define PMAP_BATCH_SET_CACHE_ATTRIBUTES_CACHEFLUSH_PASS 3
#define PMAP_BATCH_SET_CACHE_ATTRIBUTES_DONE 4
static_assert(sizeof(batch_set_cache_attr_state_t) == sizeof(uint64_t));
PMAP_SUPPORT_PROTOTYPES(
batch_set_cache_attr_state_t,
pmap_batch_set_cache_attributes, (
#if XNU_MONITOR
volatile upl_page_info_t *user_page_list,
#else /* !XNU_MONITOR */
upl_page_info_array_t user_page_list,
#endif /* XNU_MONITOR */
batch_set_cache_attr_state_t state,
unsigned int page_cnt,
unsigned int cacheattr), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
PMAP_SUPPORT_PROTOTYPES(
kern_return_t,
pmap_change_wiring, (pmap_t pmap,
vm_map_address_t v,
boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
PMAP_SUPPORT_PROTOTYPES(
pmap_t,
pmap_create_options, (ledger_t ledger,
vm_map_size_t size,
unsigned int flags,
kern_return_t * kr), PMAP_CREATE_INDEX);
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
PMAP_SUPPORT_PROTOTYPES(
kern_return_t,
pmap_enter_options, (pmap_t pmap,
vm_map_address_t v,
pmap_paddr_t pa,
vm_prot_t prot,
vm_prot_t fault_type,
unsigned int flags,
boolean_t wired,
unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
PMAP_SUPPORT_PROTOTYPES(
pmap_paddr_t,
pmap_find_pa, (pmap_t pmap,
addr64_t va), PMAP_FIND_PA_INDEX);
PMAP_SUPPORT_PROTOTYPES(
kern_return_t,
pmap_insert_commpage, (pmap_t pmap), PMAP_INSERT_COMMPAGE_INDEX);
PMAP_SUPPORT_PROTOTYPES(
boolean_t,
pmap_is_empty, (pmap_t pmap,
vm_map_offset_t va_start,
vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
PMAP_SUPPORT_PROTOTYPES(
unsigned int,
pmap_map_cpu_windows_copy, (ppnum_t pn,
vm_prot_t prot,
unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_ro_zone_memcpy, (zone_id_t zid,
vm_offset_t va,
vm_offset_t offset,
const vm_offset_t new_data,
vm_size_t new_data_size), PMAP_RO_ZONE_MEMCPY_INDEX);
PMAP_SUPPORT_PROTOTYPES(
uint64_t,
pmap_ro_zone_atomic_op, (zone_id_t zid,
vm_offset_t va,
vm_offset_t offset,
zro_atomic_op_t op,
uint64_t value), PMAP_RO_ZONE_ATOMIC_OP_INDEX);
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_ro_zone_bzero, (zone_id_t zid,
vm_offset_t va,
vm_offset_t offset,
vm_size_t size), PMAP_RO_ZONE_BZERO_INDEX);
PMAP_SUPPORT_PROTOTYPES(
vm_map_offset_t,
pmap_nest, (pmap_t grand,
pmap_t subord,
addr64_t vstart,
uint64_t size,
vm_map_offset_t vrestart,
kern_return_t * krp), PMAP_NEST_INDEX);
PMAP_SUPPORT_PROTOTYPES(
void,
pmap_page_protect_options, (ppnum_t ppnum,
vm_prot_t prot,
unsigned int options,
void *arg), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
PMAP_SUPPORT_PROTOTYPES(
vm_map_address_t,
pmap_protect_options, (pmap_t pmap,
vm_map_address_t start,
vm_map_address_t end,
vm_prot_t prot,
unsigned int options,
void *args), PMAP_PROTECT_OPTIONS_INDEX);
PMAP_SUPPORT_PROTOTYPES(
kern_return_t,
pmap_query_page_info, (pmap_t pmap,