/
compact.rs
3594 lines (3358 loc) · 140 KB
/
compact.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//! Support for the "compact unwinding format" used by Apple platforms,
//! which can be found in __unwind_info sections of binaries.
//!
//! The primary type of interest is [`CompactUnwindInfoIter`], which can be
//! constructed directly from a section of memory.
//!
//! The [`CompactUnwindInfoIter]` lets you iterate through all of the mappings
//! from instruction addresses to unwinding instructions, or lookup a specific
//! mapping by instruction address (unimplemented).
//!
//!
//!
//! # Examples
//!
//! If you want to process all the Compact Unwind Info at once, do something like this:
//!
//! ```
//! use symbolic_debuginfo::macho::{
//! CompactCfiOp, CompactCfiRegister, CompactUnwindOp,
//! CompactUnwindInfoIter, MachError, MachObject,
//! };
//!
//! fn read_compact_unwind_info<'d>(mut iter: CompactUnwindInfoIter<'d>)
//! -> Result<(), MachError>
//! {
//! // Iterate through the entries
//! while let Some(entry) = iter.next()? {
//! match entry.instructions(&iter) {
//! CompactUnwindOp::None => {
//! // No instructions for this region, will need to use
//! // stack scanning or frame-pointer techniques to unwind.
//! }
//! CompactUnwindOp::UseDwarfFde { offset_in_eh_frame } => {
//! // Need to grab the CFI info from the eh_frame section
//!
//! // process_eh_frame_fde_at(offset_in_eh_frame)
//! }
//! CompactUnwindOp::CfiOps(ops) => {
//! // Emit a new entry with the following operations
//! let start_addr = entry.instruction_address;
//! let length = entry.len;
//!
//! for instruction in ops {
//! match instruction {
//! CompactCfiOp::RegisterAt {
//! dest_reg,
//! src_reg,
//! offset_from_src,
//! } => {
//! let dest_reg_name = dest_reg.name(&iter);
//! let src_reg_name = src_reg.name(&iter);
//!
//! // Emit something to the effect of
//! // $dest_reg_name = *($src_reg_name + offset_from_src);
//! }
//! CompactCfiOp::RegisterIs {
//! dest_reg,
//! src_reg,
//! offset_from_src,
//! } => {
//! let dest_reg_name = dest_reg.name(&iter);
//! let src_reg_name = src_reg.name(&iter);
//!
//! // Emit something to the effect of
//! // $dest_reg_name = $src_reg_name + offset_from_src;
//! }
//! };
//! }
//! }
//! }
//! }
//! Ok(())
//! }
//! ```
//!
//! If you want to unwind from a specific location, do something like this
//! (API not yet implemented!):
//!
//! ```rust,ignore
//! use symbolic_debuginfo::macho::{
//! CompactCfiOp, CompactCfiRegister, CompactUnwindOp,
//! CompactUnwindInfoIter, MachError, MachObject,
//! };
//!
//! fn unwind_one_frame<'d>(mut iter: CompactUnwindInfoIter<'d>, current_address_in_module: u32)
//! -> Result<(), MachError>
//! {
//! if let Some(entry) = iter.entry_for_address(current_address_in_module)? {
//! match entry.instructions(&iter) {
//! CompactUnwindOp::None => {
//! // No instructions for this region, will need to use
//! // stack scanning or frame-pointer techniques to unwind.
//! }
//! CompactUnwindOp::UseDwarfFde { offset_in_eh_frame } => {
//! // Need to grab the CFI info from the eh_frame section
//!
//! // process_eh_frame_fde_at(offset_in_eh_frame)
//! }
//! CompactUnwindOp::CfiOps(ops) => {
//! // Emit a new entry with the following operations
//! let start_addr = entry.instruction_address;
//! let length = entry.len;
//!
//! for instruction in ops {
//! match instruction {
//! CompactCfiOp::RegisterAt {
//! dest_reg,
//! src_reg,
//! offset_from_src,
//! } => {
//! let dest_reg_name = dest_reg.name(&iter);
//! let src_reg_name = src_reg.name(&iter);
//!
//! // Emit something to the effect of
//! // $dest_reg_name = *($src_reg_name + offset_from_src);
//! }
//! CompactCfiOp::RegisterIs {
//! dest_reg,
//! src_reg,
//! offset_from_src,
//! } => {
//! let dest_reg_name = dest_reg.name(&iter);
//! let src_reg_name = src_reg.name(&iter);
//!
//! // Emit something to the effect of
//! // $dest_reg_name = $src_reg_name + offset_from_src;
//! }
//! };
//! }
//! }
//! }
//! }
//! Ok(())
//! }
//! ```
//!
//!
//! # Unimplemented Features (TODO)
//!
//! * Personality/LSDA lookup (for runtime unwinders)
//! * Entry lookup by address (for runtime unwinders)
//! * x86/x64 Stackless-Indirect mode decoding (for stack frames > 2KB)
//!
//!
//! # The Compact Unwinding Format
//!
//! This format is defined only by its implementation in llvm. Notably these two
//! files include lots of useful comments and definitions:
//!
//! * [Header describing layout of the format](https://github.com/llvm/llvm-project/blob/main/libunwind/include/mach-o/compact_unwind_encoding.h)
//! * [Implementation that outputs the format](https://github.com/llvm/llvm-project/blob/main/lld/MachO/UnwindInfoSection.cpp)
//! * [Implementation of lldb interpreting that format (CreateUnwindPlan_x86_64 especially useful)](https://github.com/llvm/llvm-project/blob/main/lldb/source/Symbol/CompactUnwindInfo.cpp)
//!
//! This implementation is based on those files at commit `d480f968ad8b56d3ee4a6b6df5532d485b0ad01e`.
//!
//! Unfortunately the description of the format in those files elides some important
//! details, and it uses some naming conventions that are confusing, so this document
//! will attempt to define this format more completely, and with more clear terms.
//!
//! Some notable terminology changes from llvm:
//!
//! * "encoding" or "encoding type" => opcode
//! * "function offset" => instruction address
//!
//! Like all unwinding info formats, the goal of the compact unwinding format
//! is to create a mapping from addresses in the binary to opcodes describing
//! how to unwind from that location.
//!
//! These opcodes describe:
//!
//! * How to recover the return pointer for the current frame
//! * How to recover some of the registers for the current frame
//! * How to run destructors / catch the unwind at runtime (personality/LSDA)
//!
//! A user of the compact unwinding format would:
//!
//! 1. Get the current instruction pointer (e.g. `%rip`).
//! 2. Lookup the corresponding opcode in the compact unwinding structure.
//! 3. Follow the instructions of that opcode to recover the current frame.
//! 4. Optionally perform runtime unwinding tasks for the current frame (destructors).
//! 5. Use that information to recover the instruction pointer of the previous frame.
//! 6. Repeat until unwinding is complete.
//!
//! The compact unwinding format can be understood as two separate pieces:
//!
//! * An architecture-agnostic "page-table" structure for finding opcode entries
//! * Architecture-specific opcode formats (x86, x64, and ARM64)
//!
//! Unlike DWARF CFI, compact unwinding doesn't have facilities for incrementally
//! updating how to recover certain registers as the function progresses.
//!
//! Empirical analysis suggests that there tends to only be one opcode for
//! an entire function (which explains why llvm refers to instruction addresses
//! as "function offsets"), although nothing in the format seems to *require*
//! this to be the case.
//!
//! One consequence of only having one opcode for a whole function is that
//! functions will generally have incorrect instructions for the function's
//! prologue (where callee-saved registers are individually PUSHed onto the
//! stack before the rest of the stack space is allocated), and epilogue
//! (where callee-saved registers are individually POPed back into registers).
//!
//! Presumably this isn't a very big deal, since there's very few situations
//! where unwinding would involve a function still executing its prologue/epilogue.
//! This might matter when handling a stack overflow that occurred while
//! saving the registers, or when processing a non-crashing thread in a minidump
//! that happened to be in its prologue/epilogue.
//!
//! Similarly, the way ranges of instructions are mapped means that Compact
//! Unwinding will generally incorrectly map the padding bytes between functions
//! (attributing them to the previous function), while DWARF CFI tends to
//! more carefully exclude those addresses. Presumably also not a big deal.
//!
//! Both of these things mean that if DWARF CFI and Compact Unwinding are
//! available for a function, the DWARF CFI is expected to be more precise.
//!
//! It's possible that LSDA entries have addresses decoupled from the primary
//! opcode so that instructions on how to run destructors can vary more
//! granularly, but LSDA support is still TODO as it's not needed for
//! backtraces.
//!
//!
//! # Page Tables
//!
//! This section describes the architecture-agnostic layout of the compact
//! unwinding format. The layout of the format is a two-level page-table
//! with one root first-level node pointing to arbitrarily many second-level
//! nodes, which in turn can hold several hundred opcode entries.
//!
//! There are two high-level concepts in this format that enable significant
//! compression of the tables:
//!
//! 1. Eliding duplicate instruction addresses
//! 2. Palettizing the opcodes
//!
//!
//!
//! Trick 1 is standard for unwinders: the table of mappings is sorted by
//! address, and any entries that would have the same opcode as the
//! previous one are elided. So for instance the following:
//!
//! ```text
//! address: 1, opcode: 1
//! address: 2, opcode: 1
//! address: 3, opcode: 2
//! ```
//!
//! Is just encoded like this:
//!
//! ```text
//! address: 1, opcode: 1
//! address: 3, opcode: 2
//! ```
//!
//! We have found a few places with "zero-length" entries, where the same
//! address gets repeated, such as the following in `libsystem_kernel.dylib`:
//!
//! ```text
//! address: 0x000121c3, opcode: 0x00000000
//! address: 0x000121c3, opcode: 0x04000680
//! ```
//!
//! In this case you can just discard the zero-length one (the first one).
//!
//!
//!
//! Trick 2 is more novel: At the first level a global palette of up to 127 opcodes
//! is defined. Each second-level "compressed" (leaf) page can also define up to 128 local
//! opcodes. Then the entries mapping instruction addresses to opcodes can use 8-bit
//! indices into those palettes instead of entire 32-bit opcodes. If an index is
//! smaller than the number of global opcodes, it's global, otherwise it's local
//! (subtract the global count to get the local index).
//!
//! > Unclear detail: If the global palette is smaller than 127, can the local
//! palette be larger than 128?
//!
//! To compress these entries into a single 32-bit value, the address is truncated
//! to 24 bits and packed with the index. The addresses stored in these entries
//! are also relative to a base address that each second-level page defines.
//! (This will be made more clear below).
//!
//! There are also non-palletized "regular" second-level pages with absolute
//! 32-bit addresses, but those are fairly rare. llvm seems to only want to emit
//! them in the final page.
//!
//! The root page also stores the first address mapped by each second-level
//! page, allowing for more efficient binary search for a particular function
//! offset entry. (This is the base address the compressed pages use.)
//!
//! The root page always has a final sentinel entry which has a null pointer
//! to its second-level page while still specifying a first address. This
//! makes it easy to lookup the maximum mapped address (the sentinel will store
//! that value +1), and just generally makes everything Work Nicer.
//!
//!
//!
//! ## Layout of the Page Table
//!
//! The page table starts at the very beginning of the `__unwind_info` section
//! with the root page:
//!
//! ```rust,ignore
//! struct RootPage {
//! /// Only version 1 is currently defined
//! version: u32 = 1,
//!
//! /// The array of u32 global opcodes (offset relative to start of root page).
//! ///
//! /// These may be indexed by "compressed" second-level pages.
//! global_opcodes_offset: u32,
//! global_opcodes_len: u32,
//!
//! /// The array of u32 global personality codes
//! /// (offset relative to start of root page).
//! ///
//! /// Personalities define the style of unwinding that an unwinder should
//! /// use, and how to interpret the LSDA entries for a function (see below).
//! personalities_offset: u32,
//! personalities_len: u32,
//!
//! /// The array of FirstLevelPageEntry's describing the second-level pages
//! /// (offset relative to start of root page).
//! pages_offset: u32,
//! pages_len: u32,
//!
//! // After this point there are several dynamically-sized arrays whose
//! // precise order and positioning don't matter, because they are all
//! // accessed using offsets like the ones above. The arrays are:
//!
//! global_opcodes: [u32; global_opcodes_len],
//! personalities: [u32; personalities_len],
//! pages: [FirstLevelPageEntry; pages_len],
//!
//! /// An array of LSDA pointers (Language Specific Data Areas).
//! ///
//! /// LSDAs are tables that an unwinder's personality function will use to
//! /// find what destructors should be run and whether unwinding should
//! /// be caught and normal execution resumed. We can treat them opaquely.
//! ///
//! /// Second-level pages have addresses into this array so that it can
//! /// can be indexed, the root page doesn't need to know about them.
//! lsdas: [LsdaEntry; unknown_len],
//! }
//!
//!
//! struct FirstLevelPageEntry {
//! /// The first address mapped by this page.
//! ///
//! /// This is useful for binary-searching for the page that can map
//! /// a specific address in the binary (the primary kind of lookup
//! /// performed by an unwinder).
//! first_address: u32,
//!
//! /// Offset to the second-level page (offset relative to start of root page).
//! ///
//! /// This may point to a RegularSecondLevelPage or a CompressedSecondLevelPage.
//! /// Which it is can be determined by the 32-bit "kind" value that is at
//! /// the start of both layouts.
//! second_level_page_offset: u32,
//!
//! /// Base offset into the lsdas array that entries in this page will be
//! /// relative to (offset relative to start of root page).
//! lsda_index_offset: u32,
//! }
//!
//!
//! struct RegularSecondLevelPage {
//! /// Always 2 (use to distinguish from CompressedSecondLevelPage).
//! kind: u32 = 2,
//!
//! /// The Array of RegularEntry's (offset relative to **start of this page**).
//! entries_offset: u16,
//! entries_len: u16,
//! }
//!
//!
//! struct RegularEntry {
//! /// The address in the binary for this entry (absolute).
//! instruction_address: u32,
//! /// The opcode for this address.
//! opcode: u32,
//! }
//!
//!
//! struct CompressedSecondLevelPage {
//! /// Always 3 (use to distinguish from RegularSecondLevelPage).
//! kind: u32 = 3,
//!
//! /// The array of compressed u32 entries
//! /// (offset relative to **start of this page**).
//! ///
//! /// Entries are a u32 that contains two packed values (from high to low):
//! /// * 8 bits: opcode index
//! /// * 0..global_opcodes_len => index into global palette
//! /// * global_opcodes_len..255 => index into local palette
//! /// (subtract global_opcodes_len to get the real local index)
//! /// * 24 bits: instruction address
//! /// * address is relative to this page's first_address!
//! entries_offset: u16,
//! entries_len: u16,
//!
//! /// The array of u32 local opcodes for this page
//! /// (offset relative to **start of this page**).
//! local_opcodes_offset: u16,
//! local_opcodes_len: u16,
//! }
//!
//!
//! // TODO: why do these have instruction_addresses? Are they not in sync
//! // with the second-level entries?
//! struct LsdaEntry {
//! instruction_address: u32,
//! lsda_address: u32,
//! }
//! ```
//!
//!
//!
//! # Opcode Format
//!
//! There are 3 architecture-specific opcode formats: x86, x64, and ARM64.
//!
//! All 3 formats have a "null opcode" (`0x0000_0000`) which indicates that
//! there is no unwinding information for this range of addresses. This happens
//! with things like hand-written assembly subroutines. This implementation
//! will yield it as a valid opcode that converts into [`CompactUnwindOp::None`].
//!
//! All 3 formats share a common header in the top 8 bits (from high to low):
//!
//! ```rust,ignore
//! /// Whether this instruction is the start of a function.
//! is_start: u1,
//!
//! /// Whether there is an lsda entry for this instruction.
//! has_lsda: u1,
//!
//! /// An index into the global personalities array
//! /// (TODO: ignore if has_lsda == false?)
//! personality_index: u2,
//!
//! /// The architecture-specific kind of opcode this is, specifying how to
//! /// interpret the remaining 24 bits of the opcode.
//! opcode_kind: u4,
//! ```
//!
//!
//!
//! ## x86 and x64 Opcodes
//!
//! x86 and x64 use the same opcode layout, differing only in the registers
//! being restored. Registers are numbered 0-6, with the following mappings:
//!
//! x86:
//! * 0 => no register (like `Option::None`)
//! * 1 => `ebx`
//! * 2 => `ecx`
//! * 3 => `edx`
//! * 4 => `edi`
//! * 5 => `esi`
//! * 6 => `ebp`
//!
//! x64:
//! * 0 => no register (like `Option::None`)
//! * 1 => `rbx`
//! * 2 => `r12`
//! * 3 => `r13`
//! * 4 => `r14`
//! * 5 => `r15`
//! * 6 => `rbp`
//!
//! Note also that encoded sizes/offsets are generally divided by the pointer size
//! (since all values we are interested in are pointer-aligned), which of course differs
//! between x86 and x64.
//!
//! There are 4 kinds of x86/x64 opcodes (specified by opcode_kind):
//!
//! (One of the llvm headers refers to a 5th "0=old" opcode. Apparently this
//! was used for initial development of the format, and is basically just
//! reserved to prevent the testing data from ever getting mixed with real
//! data. Nothing should produce or handle it. It does incidentally match
//! the "null opcode", but it's fine to regard that as an unknown opcode
//! and do nothing.)
//!
//!
//! ### x86/x64 Opcode 1: Frame-Based
//!
//! The function has the standard frame pointer (`bp`) prelude which:
//!
//! * Pushes the caller's `bp` to the stack
//! * Sets `bp := sp` (new frame pointer is the current top of the stack)
//!
//! `bp` has been preserved, and any callee-saved registers that need to be restored
//! are saved on the stack at a known offset from `bp`. The return address is
//! stored just before the caller's `bp`. The caller's stack pointer should
//! point before where the return address is saved.
//!
//! So to unwind you just need to do:
//!
//! ```text
//! %sp := %bp + 2*POINTER_SIZE
//! %ip := *(%bp + POINTER_SIZE)
//! %bp := *(%bp)
//!
//! (and restore all the other callee-saved registers as described below)
//! ```
//!
//! Registers are stored in increasing order (so `reg1` comes before `reg2`).
//! If a register has the "no register" value, continue iterating the offset
//! forward. This lets the registers be stored slightly-non-contiguously on the
//! stack.
//!
//! The remaining 24 bits of the opcode are interpreted as follows (from high to low):
//!
//! ```rust,ignore
//! /// Registers to restore (see register mapping in previous section)
//! reg1: u3,
//! reg2: u3,
//! reg3: u3,
//! reg4: u3,
//! reg5: u3,
//!
//! _unused: u1,
//!
//! /// The offset from bp that the registers to restore are saved at,
//! /// divided by pointer size.
//! stack_offset: u8,
//! ```
//!
//!
//!
//! ### x86/x64 Opcode 2: Frameless (Stack-Immediate)
//!
//!
//! The callee's stack frame has a known size, so we can find the start
//! of the frame by offsetting from sp (the stack pointer). The return
//! address is saved immediately after that location. Any callee-saved
//! registers that need to be restored are saved immediately after that.
//!
//! So to unwind you just need to do:
//!
//! ```text
//! %sp := %sp + stack_size * POINTER_SIZE
//! %ip := *(%sp - 8)
//!
//! (and restore all the other callee-saved registers as described below)
//! ```
//!
//! Registers are stored in *reverse* order on the stack from the order the
//! decoding algorithm outputs (so `reg[1]` comes before `reg[0]`).
//!
//! If a register has the "no register" value, *do not* continue iterating the
//! offset forward -- registers are strictly contiguous (it's possible
//! "no register" can only be trailing due to the encoding, but I haven't
//! verified this).
//!
//! The remaining 24 bits of the opcode are interpreted as follows (from high to low):
//!
//! ```rust,ignore
//! /// How big the stack frame is, divided by pointer size.
//! stack_size: u8,
//!
//! _unused: u3,
//!
//! /// The number of registers that are saved on the stack.
//! register_count: u3,
//!
//! /// The permutation encoding of the registers that are saved
//! /// on the stack (see below).
//! register_permutations: u10,
//! ```
//!
//! The register permutation encoding is a Lehmer code sequence encoded into a
//! single variable-base number so we can encode the ordering of up to
//! six registers in a 10-bit space.
//!
//! This can't really be described well with anything but code, so
//! just read this implementation or llvm's implementation for how to
//! encode/decode this.
//!
//!
//!
//! ### x86/x64 Opcode 3: Frameless (Stack-Indirect)
//!
//! (Currently Unimplemented)
//!
//! Stack-Indirect is exactly the same situation as Stack-Immediate, but
//! the stack-frame size is too large for Stack-Immediate to encode. However,
//! the function prereserved the size of the frame in its prologue, so we can
//! extract the the size of the frame from a `sub` instruction at a known
//! offset from the start of the function (`subl $nnnnnnnn,ESP` in x86,
//! `subq $nnnnnnnn,RSP` in x64).
//!
//! This requires being able to find the first instruction of the function
//! (TODO: presumably the first is_start entry <= this one?).
//!
//! TODO: describe how to extract the value from the `sub` instruction.
//!
//!
//! ```rust,ignore
//! /// Offset from the start of the function where the `sub` instruction
//! /// we need is stored. (NOTE: not divided by anything!)
//! instruction_offset: u8,
//!
//! /// An offset to add to the loaded stack size, divided by pointer size.
//! /// This allows the stack size to differ slightly from the `sub`, to
//! /// compensate for any function prologue that pushes a bunch of
//! /// pointer-sized registers.
//! stack_adjust: u3,
//!
//! /// The number of registers that are saved on the stack.
//! register_count: u3,
//!
//! /// The permutation encoding of the registers that are saved on the stack
//! /// (see Stack-Immediate for a description of this format).
//! register_permutations: u10,
//! ```
//!
//! **Note**: apparently binaries generated by the clang in Xcode 6 generated
//! corrupted versions of this opcode, but this was fixed in Xcode 7
//! (released in September 2015), so *presumably* this isn't something we're
//! likely to encounter. But if you encounter messed up opcodes this might be why.
//!
//!
//!
//! ### x86/x64 Opcode 4: Dwarf
//!
//! There is no compact unwind info here, and you should instead use the
//! DWARF CFI in `.eh_frame` for this line. The remaining 24 bits of the opcode
//! are an offset into the `.eh_frame` section that should hold the DWARF FDE
//! for this instruction address.
//!
//!
//!
//! ## ARM64 Opcodes
//!
//! ARM64 (AKA AArch64) is a lot more strict about the ABI of functions, and
//! as such it has fairly simple opcodes. There are 3 kinds of ARM64 opcode:
//!
//! (Yes there's no Opcode 1, I don't know why.)
//!
//!
//! ### ARM64 Opcode 2: Frameless
//!
//! This is a "frameless" leaf function. The caller is responsible for
//! saving/restoring all of its general purpose registers. The frame pointer
//! is still the caller's frame pointer and doesn't need to be touched. The
//! return address is stored in the link register (`x30`).
//!
//! So to unwind you just need to do:
//!
//! ```text
//! %sp := %sp + stack_size * 16
//! %pc := %x30
//!
//! (no other registers to restore)
//! ```
//!
//! The remaining 24 bits of the opcode are interpreted as follows (from high to low):
//!
//! ```rust,ignore
//! /// How big the stack frame is, divided by 16.
//! stack_size: u12,
//!
//! _unused: u12,
//! ```
//!
//!
//!
//! ### ARM64 Opcode 3: Dwarf
//!
//! There is no compact unwind info here, and you should instead use the
//! DWARF CFI in `.eh_frame` for this line. The remaining 24 bits of the opcode
//! are an offset into the `.eh_frame` section that should hold the DWARF FDE
//! for this instruction address.
//!
//!
//!
//! ### ARM64 Opcode 4: Frame-Based
//!
//! This is a function with the standard prologue. The return address (`pc`) and the
//! frame pointer (`x29`) were pushed onto the stack in a pair and in that order
//! (ARM64 registers are saved/restored in pairs), and then the frame pointer was updated
//! to the current stack pointer.
//!
//! So to unwind you just need to do:
//!
//! ```text
//! %sp := %x29 + 16
//! %pc := *(%x29 + 8)
//! %x29 := *(%x29)
//!
//! (and restore all the other callee-saved registers as described below)
//! ```
//!
//! Any callee-saved registers that need to be restored were then pushed
//! onto the stack in pairs in the following order (if they were pushed at
//! all, see below):
//!
//! 1. `x19`, `x20`
//! 2. `x21`, `x22`
//! 3. `x23`, `x24`
//! 4. `x25`, `x26`
//! 5. `x27`, `x28`
//! 6. `d8`, `d9`
//! 7. `d10`, `d11`
//! 8. `d12`, `d13`
//! 9. `d14`, `d15`
//!
//! The remaining 24 bits of the opcode are interpreted as follows (from high to low):
//!
//! ```rust,ignore
//! _unused: u15,
//!
//! // Whether each register pair was pushed
//! d14_and_d15_saved: u1,
//! d12_and_d13_saved: u1,
//! d10_and_d11_saved: u1,
//! d8_and_d9_saved: u1,
//!
//! x27_and_x28_saved: u1,
//! x25_and_x26_saved: u1,
//! x23_and_x24_saved: u1,
//! x21_and_x22_saved: u1,
//! x19_and_x20_saved: u1,
//! ```
//!
//!
//!
//! # Notable Corners
//!
//! Here's some notable corner cases and esoterica of the format. Behaviour in
//! these situations is not strictly guaranteed (as in we may decide to
//! make the implemenation more strict or liberal if it is deemed necessary
//! or desirable). But current behaviour *is* documented here for the sake of
//! maintenance/debugging. Hopefully it also helps highlight all the ways things
//! can go wrong for anyone using this documentation to write their own tooling.
//!
//! For all these cases, if an Error is reported during iteration/search, the
//! [`CompactUnwindInfoIter`] will be in an unspecified state for future queries.
//! It will never violate memory safety but it may start yielding chaotic
//! values.
//!
//! If this implementation ever panics, that should be regarded as an
//! implementation bug.
//!
//!
//! Things we allow:
//!
//! * The personalities array has a 32-bit length, but all indices into
//! it are only 2 bits. As such, it is theoretically possible for there
//! to be unindexable personalities. In practice that Shouldn't Happen,
//! and this implementation won't report an error if it does, because it
//! can be benign (although we have no way to tell if indices were truncated).
//!
//! * The llvm headers say that at most there should be 127 global opcodes
//! and 128 local opcodes, but since local index translation is based on
//! the actual number of global opcodes and *not* 127/128, there's no
//! reason why each palette should be individually limited like this.
//! This implementation doesn't report an error if this happens, and should
//! work fine if it does.
//!
//! * The llvm headers say that second-level pages are *actual* pages at
//! a fixed size of 4096 bytes. It's unclear what advantage this provides,
//! perhaps there's a situation where you're mapping in the pages on demand?
//! This puts a practical limit on the number of entries each second-level
//! page can hold -- regular pages can fit 511 entries, while compressed
//! pages can hold 1021 entries+local_opcodes (they have to share). This
//! implementation does not report an error if a second-level page has more
//! values than that, and should work fine if it does.
//!
//! * If a [`CompactUnwindInfoIter`] is created for an architecture it wasn't
//! designed for, it is assumed that the layout of the page tables will
//! remain the same, and entry iteration/lookup should still work and
//! produce results. However [`CompactUnwindInfoEntry::instructions`]
//! will always return [`CompactUnwindOp::None`].
//!
//! * If an opcode kind is encountered that this implementation wasn't
//! designed for, `Opcode::instructions` will return [`CompactUnwindOp::None`].
//!
//! * If two entries have the same address (making the first have zero-length),
//! we silently discard the first one in favour of the second.
//!
//! * Only 7 register mappings are provided for x86/x64 opcodes, but the
//! 3-bit encoding allows for 8. This implementation will just map the
//! 8th encoding to "no register" as well.
//!
//! * Only 6 registers can be restored by the x86/x64 stackless modes, but
//! the 3-bit encoding of the register count allows for 7. This implementation
//! clamps the value to 6.
//!
//!
//! Things we produce errors for:
//!
//! * If the root page has a version this implementation wasn't designed for,
//! [`CompactUnwindInfoIter::new`] will return an Error.
//!
//! * A corrupt unwind_info section may have its entries out of order. Since
//! the next entry's instruction_address is always needed to compute the
//! number of bytes the current entry covers, the implementation will report
//! an error if it encounters this. However it does not attempt to fully
//! validate the ordering during an `entry_for_address` query, as this would
//! significantly slow down the binary search. In this situation
//! you may get chaotic results (same guarantees as `BTreeMap` with an
//! inconsistent `Ord` implementation).
//!
//! * A corrupt unwind_info section may attempt to index out of bounds either
//! with out-of-bounds offset values (e.g. personalities_offset) or with out
//! of bounds indices (e.g. a local opcode index). When an array length is
//! provided, this implementation will return an error if an index is out
//! out of bounds. Offsets are only restricted to the unwind_info
//! section itself, as this implementation does not assume arrays are
//! placed in any particular place, and does not try to prevent aliasing.
//! Trying to access outside the `.unwind_info` section will return an error.
//!
//! * If an unknown second-level page type is encountered, iteration/lookup will
//! return an error.
//!
//!
//! Things that cause chaos:
//!
//! * If the null page was missing, there would be no way to identify the
//! number of instruction bytes the last entry in the table covers. As such,
//! this implementation assumes that it exists, and currently does not validate
//! it ahead of time. If the null page *is* missing, the last entry or page
//! may be treated as the null page, and won't be emitted. (Perhaps we should
//! provide more reliable behaviour here?)
//!
//! * If there are multiple null pages, or if there is a page with a defined
//! second-level page but no entries of its own, behaviour is unspecified.
//!
use crate::macho::MachError;
use goblin::error::Error;
use goblin::mach::segment::SectionData;
use scroll::{Endian, Pread};
use std::mem;
// Hacky glue types to keep exposure of the containing library minimal.
// This will help with transplanting this code into goblin.
type Result<T> = std::result::Result<T, MachError>;
#[derive(Debug, Clone)]
enum Arch {
X86,
X64,
Arm64,
Other,
}
// Types marked with repr(C) indicate their layout precisely matches the
// layout of the format. In theory we could point directly into the binary
// of the unwind_info section with these types, but we avoid doing so for
// endianness/safety.
#[repr(C)]
#[derive(Debug, Clone, Pread)]
struct FirstLevelPage {
// Only version 1 is currently defined
// version: u32 = 1,
/// The array of u32 global opcodes (offset relative to start of root page).
///
/// These may be indexed by "compressed" second-level pages.
global_opcodes_offset: u32,
global_opcodes_len: u32,
/// The array of u32 global personality codes (offset relative to start of root page).
///
/// Personalities define the style of unwinding that an unwinder should use,
/// and how to interpret the LSDA entries for a function (see below).
personalities_offset: u32,
personalities_len: u32,
/// The array of [`FirstLevelPageEntry`]'s describing the second-level pages
/// (offset relative to start of root page).
pages_offset: u32,
pages_len: u32,
// After this point there are several dynamically-sized arrays whose precise
// order and positioning don't matter, because they are all accessed using
// offsets like the ones above. The arrays are:
// global_opcodes: [u32; global_opcodes_len],
// personalities: [u32; personalities_len],
// pages: [FirstLevelPageEntry; pages_len],
// lsdas: [LsdaEntry; unknown_len],
}
#[repr(C)]
#[derive(Debug, Clone, Pread)]
struct FirstLevelPageEntry {
/// The first address mapped by this page.
///
/// This is useful for binary-searching for the page that can map
/// a specific address in the binary (the primary kind of lookup
/// performed by an unwinder).
first_address: u32,
/// Offset to the second-level page (offset relative to start of root page).
///
/// This may point to either a [`RegularSecondLevelPage`] or a [`CompressedSecondLevelPage`].
/// Which it is can be determined by the 32-bit "kind" value that is at
/// the start of both layouts.
second_level_page_offset: u32,
/// Base offset into the lsdas array that entries in this page will be relative
/// to (offset relative to start of root page).
lsda_index_offset: u32,
}
#[repr(C)]
#[derive(Debug, Clone, Pread)]
struct RegularSecondLevelPage {
// Always 2 (use to distinguish from CompressedSecondLevelPage).
// kind: u32 = 2,
/// The Array of [`RegularEntry`]'s (offset relative to **start of this page**).
entries_offset: u16,
entries_len: u16,
}
#[repr(C)]
#[derive(Debug, Clone, Pread)]
struct CompressedSecondLevelPage {
// Always 3 (use to distinguish from RegularSecondLevelPage).
// kind: u32 = 3,
/// The array of compressed u32 entries (offset relative to **start of this page**).
///
/// Entries are a u32 that contains two packed values (from highest to lowest bits):
/// * 8 bits: opcode index
/// * 0..global_opcodes_len => index into global palette
/// * global_opcodes_len..255 => index into local palette (subtract global_opcodes_len)
/// * 24 bits: instruction address
/// * address is relative to this page's first_address!
entries_offset: u16,
entries_len: u16,
/// The array of u32 local opcodes for this page (offset relative to **start of this page**).
local_opcodes_offset: u16,
local_opcodes_len: u16,
}
#[repr(C)]
#[derive(Debug, Clone, Pread)]
struct RegularEntry {
/// The address in the binary for this entry (absolute).
instruction_address: u32,
/// The opcode for this address.
opcode: u32,
}
#[derive(Debug, Clone)]
#[repr(C)]
struct LsdaEntry {
instruction_address: u32,
lsda_address: u32,
}
#[derive(Debug, Clone)]
enum OpcodeOrIndex {
Opcode(u32),
Index(u32),
}
#[derive(Debug, Clone)]
struct RawCompactUnwindInfoEntry {
/// The address of the first instruction this entry applies to
/// (may apply to later instructions as well).
instruction_address: u32,
/// Either an opcode or the index into an opcode palette
opcode_or_index: OpcodeOrIndex,
}
/// An iterator over the [`CompactUnwindInfoEntry`]'s of a `.unwind_info` section.
#[derive(Debug, Clone)]
pub struct CompactUnwindInfoIter<'a> {
/// Parent .unwind_info metadata.
arch: Arch,
endian: Endian,
section: SectionData<'a>,
/// Parsed root page.
root: FirstLevelPage,
// Iterator state
/// Current index in the root node.
first_idx: u32,
/// Current index in the second-level node.
second_idx: u32,
/// Parsed version of the current pages.
page_of_next_entry: Option<(FirstLevelPageEntry, SecondLevelPage)>,
/// Minimally parsed version of the next entry, which we need to have
/// already loaded to know how many instructions the previous entry covered.
next_entry: Option<RawCompactUnwindInfoEntry>,
done_page: bool,
}
impl<'a> CompactUnwindInfoIter<'a> {
/// Creates a new [`CompactUnwindInfoIter`] for the given section.
pub fn new(
section: SectionData<'a>,
little_endian: bool,
arch: symbolic_common::Arch,
) -> Result<Self> {