-
Notifications
You must be signed in to change notification settings - Fork 352
/
Copy pathdwarf.cc
2126 lines (1828 loc) · 64.9 KB
/
dwarf.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <assert.h>
#include <stdio.h>
#include <algorithm>
#include <initializer_list>
#include <iostream>
#include <memory>
#include <stack>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "absl/base/attributes.h"
#include "absl/base/macros.h"
#include "absl/strings/string_view.h"
#include "absl/strings/substitute.h"
#include "absl/types/optional.h"
#include "bloaty.h"
#include "bloaty.pb.h"
#include "dwarf_constants.h"
#include "re2/re2.h"
using namespace dwarf2reader;
using absl::string_view;
static size_t AlignUpTo(size_t offset, size_t granularity) {
// Granularity must be a power of two.
return (offset + granularity - 1) & ~(granularity - 1);
}
ABSL_ATTRIBUTE_NORETURN
static void Throw(const char *str, int line) {
throw bloaty::Error(str, __FILE__, line);
}
#define THROW(msg) Throw(msg, __LINE__)
#define THROWF(...) Throw(absl::Substitute(__VA_ARGS__).c_str(), __LINE__)
namespace bloaty {
extern int verbose_level;
namespace dwarf {
int DivRoundUp(int n, int d) {
return (n + (d - 1)) / d;
}
// Low-level Parsing Routines //////////////////////////////////////////////////
// For parsing the low-level values found in DWARF files. These are the only
// routines that touch the bytes of the input buffer directly. Everything else
// is layered on top of these.
template <class T>
T ReadMemcpy(string_view* data) {
T ret;
if (data->size() < sizeof(T)) {
THROW("premature EOF reading fixed-length DWARF data");
}
memcpy(&ret, data->data(), sizeof(T));
data->remove_prefix(sizeof(T));
return ret;
}
string_view ReadPiece(size_t bytes, string_view* data) {
if(data->size() < bytes) {
THROW("premature EOF reading variable-length DWARF data");
}
string_view ret = data->substr(0, bytes);
data->remove_prefix(bytes);
return ret;
}
void SkipBytes(size_t bytes, string_view* data) {
if (data->size() < bytes) {
THROW("premature EOF skipping DWARF data");
}
data->remove_prefix(bytes);
}
string_view ReadNullTerminated(string_view* data) {
const char* nullz =
static_cast<const char*>(memchr(data->data(), '\0', data->size()));
// Return false if not NULL-terminated.
if (nullz == NULL) {
THROW("DWARF string was not NULL-terminated");
}
size_t len = nullz - data->data();
string_view val = data->substr(0, len);
data->remove_prefix(len + 1); // Remove NULL also.
return val;
}
void SkipNullTerminated(string_view* data) {
const char* nullz =
static_cast<const char*>(memchr(data->data(), '\0', data->size()));
// Return false if not NULL-terminated.
if (nullz == NULL) {
THROW("DWARF string was not NULL-terminated");
}
size_t len = nullz - data->data();
data->remove_prefix(len + 1); // Remove NULL also.
}
// Parses the LEB128 format defined by DWARF (both signed and unsigned
// versions).
uint64_t ReadLEB128Internal(bool is_signed, string_view* data) {
uint64_t ret = 0;
int shift = 0;
int maxshift = 70;
const char* ptr = data->data();
const char* limit = ptr + data->size();
while (ptr < limit && shift < maxshift) {
char byte = *(ptr++);
ret |= static_cast<uint64_t>(byte & 0x7f) << shift;
shift += 7;
if ((byte & 0x80) == 0) {
data->remove_prefix(ptr - data->data());
if (is_signed && shift < 64 && (byte & 0x40)) {
ret |= -(1ULL << shift);
}
return ret;
}
}
THROW("corrupt DWARF data, unterminated LEB128");
}
template <typename T>
T ReadLEB128(string_view* data) {
typedef typename std::conditional<std::is_signed<T>::value, int64_t,
uint64_t>::type Int64Type;
Int64Type val = ReadLEB128Internal(std::is_signed<T>::value, data);
if (val > std::numeric_limits<T>::max() ||
val < std::numeric_limits<T>::min()) {
THROW("DWARF data contained larger LEB128 than we were expecting");
}
return static_cast<T>(val);
}
void SkipLEB128(string_view* data) {
size_t limit =
std::min(static_cast<size_t>(data->size()), static_cast<size_t>(10));
for (size_t i = 0; i < limit; i++) {
if (((*data)[i] & 0x80) == 0) {
data->remove_prefix(i + 1);
return;
}
}
THROW("corrupt DWARF data, unterminated LEB128");
}
// Some size information attached to each compilation unit. The size of an
// address or offset in the DWARF data depends on this state which is parsed
// from the header.
class CompilationUnitSizes {
public:
// When true, DWARF offsets are 64 bits, otherwise they are 32 bit.
bool dwarf64() const { return dwarf64_; }
// The size of addresses. Guaranteed to be either 4 or 8.
uint8_t address_size() const { return address_size_; }
// DWARF version of this unit.
uint8_t dwarf_version() const { return dwarf_version_; }
void SetAddressSize(uint8_t address_size) {
if (address_size != 4 && address_size != 8) {
THROWF("Unexpected address size: $0", address_size);
}
address_size_ = address_size;
}
// To allow this as the key in a map.
bool operator<(const CompilationUnitSizes& rhs) const {
return std::tie(dwarf64_, address_size_) <
std::tie(rhs.dwarf64_, rhs.address_size_);
}
// Reads a DWARF offset based on whether we are reading dwarf32 or dwarf64
// format.
uint64_t ReadDWARFOffset(string_view* data) const {
if (dwarf64_) {
return ReadMemcpy<uint64_t>(data);
} else {
return ReadMemcpy<uint32_t>(data);
}
}
// Reads an address according to the expected address_size.
uint64_t ReadAddress(string_view* data) const {
if (address_size_ == 8) {
return ReadMemcpy<uint64_t>(data);
} else if (address_size_ == 4) {
return ReadMemcpy<uint32_t>(data);
} else {
BLOATY_UNREACHABLE();
}
}
// Reads an "initial length" as specified in many DWARF headers. This
// contains either a 32-bit or a 64-bit length, and signals whether we are
// using the 32-bit or 64-bit DWARF format (so it sets dwarf64 appropriately).
//
// Returns the range for this section and stores the remaining data
// in |remaining|.
string_view ReadInitialLength(string_view* remaining) {
uint64_t len = ReadMemcpy<uint32_t>(remaining);
if (len == 0xffffffff) {
dwarf64_ = true;
len = ReadMemcpy<uint64_t>(remaining);
} else {
dwarf64_ = false;
}
if (remaining->size() < len) {
THROW("short DWARF compilation unit");
}
string_view unit = *remaining;
unit.remove_suffix(remaining->size() - len);
*remaining = remaining->substr(len);
return unit;
}
void ReadDWARFVersion(string_view* data) {
dwarf_version_ = ReadMemcpy<uint16_t>(data);
}
private:
uint16_t dwarf_version_;
bool dwarf64_;
uint8_t address_size_;
};
// AbbrevTable /////////////////////////////////////////////////////////////////
// Parses and stores a representation of (a portion of) the .debug_abbrev
// section of a DWARF file. An abbreviation is defined by a unique "code"
// (unique within one table), and defines the DIE tag and set of attributes.
// The encoding of the DIE then contains just the abbreviation code and the
// attribute values -- thanks to the abbreviation table, the tag and attribute
// keys/names are not required.
//
// The abbreviations are an internal detail of the DWARF format and users should
// not need to care about them.
class AbbrevTable {
public:
// Reads abbreviations until a terminating abbreviation is seen.
string_view ReadAbbrevs(string_view data);
// In a DWARF abbreviation, each attribute has a name and a form.
struct Attribute {
uint16_t name;
uint8_t form;
};
// The representation of a single abbreviation.
struct Abbrev {
uint32_t code;
uint16_t tag;
bool has_child;
std::vector<Attribute> attr;
};
bool IsEmpty() const { return abbrev_.empty(); }
// Looks for an abbreviation with the given code. Returns true if the lookup
// succeeded.
bool GetAbbrev(uint32_t code, const Abbrev** abbrev) const {
auto it = abbrev_.find(code);
if (it != abbrev_.end()) {
*abbrev = &it->second;
return true;
} else {
return false;
}
}
private:
// Keyed by abbreviation code.
// Generally we expect these to be small, so we could almost use a vector<>.
// But you never know what crazy input data is going to do...
std::unordered_map<uint32_t, Abbrev> abbrev_;
};
string_view AbbrevTable::ReadAbbrevs(string_view data) {
while (true) {
uint32_t code = ReadLEB128<uint32_t>(&data);
if (code == 0) {
return data; // Terminator entry.
}
Abbrev& abbrev = abbrev_[code];
if (abbrev.code) {
THROW("DWARF data contained duplicate abbrev code");
}
uint8_t has_child;
abbrev.code = code;
abbrev.tag = ReadLEB128<uint16_t>(&data);
has_child = ReadMemcpy<uint8_t>(&data);
switch (has_child) {
case DW_children_yes:
abbrev.has_child = true;
break;
case DW_children_no:
abbrev.has_child = false;
break;
default:
THROW("DWARF has_child is neither true nor false.");
}
while (true) {
Attribute attr;
attr.name = ReadLEB128<uint16_t>(&data);
attr.form = ReadLEB128<uint8_t>(&data);
if (attr.name == 0 && attr.form == 0) {
break; // End of this abbrev
}
abbrev.attr.push_back(attr);
}
}
}
// StringTable /////////////////////////////////////////////////////////////////
// Represents the .debug_str portion of a DWARF file and contains code for
// reading strings out of it. This is an internal detail of the DWARF format
// and users should not need to care about it.
class StringTable {
public:
// Construct with the debug_str data from a DWARF file.
StringTable(string_view debug_str) : debug_str_(debug_str) {}
// Read a string from the table.
string_view ReadEntry(size_t ofs) const;
private:
string_view debug_str_;
};
string_view StringTable::ReadEntry(size_t ofs) const {
string_view str = debug_str_;
SkipBytes(ofs, &str);
return ReadNullTerminated(&str);
}
// AddressRanges ///////////////////////////////////////////////////////////////
// Code for reading address ranges out of .debug_aranges.
class AddressRanges {
public:
AddressRanges(string_view data) : section_(data), next_unit_(data) {}
// Offset into .debug_info for the current compilation unit.
uint64_t debug_info_offset() { return debug_info_offset_; }
// Address and length for this range.
uint64_t address() { return address_; }
uint64_t length() { return length_; }
// Advance to the next range. The values will be available in address() and
// length(). Returns false when the end of this compilation unit is hit.
// Must call this once before reading the first range.
bool NextRange();
// Advance to the next compilation unit. The unit offset will be available in
// debug_info_offset(). Must call this once before reading the first unit.
bool NextUnit();
private:
CompilationUnitSizes sizes_;
string_view section_;
string_view unit_remaining_;
string_view next_unit_;
uint64_t debug_info_offset_;
uint64_t address_;
uint64_t length_;
};
bool AddressRanges::NextRange() {
if (unit_remaining_.empty()) {
return false;
}
address_ = sizes_.ReadAddress(&unit_remaining_);
length_ = sizes_.ReadAddress(&unit_remaining_);
return true;
}
bool AddressRanges::NextUnit() {
if (next_unit_.empty()) {
return false;
}
unit_remaining_ = sizes_.ReadInitialLength(&next_unit_);
sizes_.ReadDWARFVersion(&unit_remaining_);
if (sizes_.dwarf_version() > 2) {
THROW("DWARF data is too new for us");
}
debug_info_offset_ = sizes_.ReadDWARFOffset(&unit_remaining_);
uint8_t segment_size;
sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&unit_remaining_));
segment_size = ReadMemcpy<uint8_t>(&unit_remaining_);
if (segment_size) {
THROW("we don't know how to handle segmented addresses.");
}
size_t ofs = unit_remaining_.data() - section_.data();
size_t aligned_ofs = AlignUpTo(ofs, sizes_.address_size() * 2);
SkipBytes(aligned_ofs - ofs, &unit_remaining_);
return true;
}
// LocationList ////////////////////////////////////////////////////////////////
// Code for reading entries out of a location list.
// For the moment we only care about finding the bounds of a list given its
// offset, so we don't actually vend any of the data.
class LocationList {
public:
LocationList(CompilationUnitSizes sizes, string_view data)
: sizes_(sizes), remaining_(data) {}
const char* read_offset() const { return remaining_.data(); }
bool NextEntry();
private:
CompilationUnitSizes sizes_;
string_view remaining_;
};
bool LocationList::NextEntry() {
uint64_t start, end;
start = sizes_.ReadAddress(&remaining_);
end = sizes_.ReadAddress(&remaining_);
if (start == 0 && end == 0) {
return false;
} else if (start == UINT64_MAX ||
(start == UINT32_MAX && sizes_.address_size() == 4)) {
// Base address selection, nothing more to do.
} else {
// Need to skip the location description.
uint16_t length = ReadMemcpy<uint16_t>(&remaining_);
SkipBytes(length, &remaining_);
}
return true;
}
string_view GetLocationListRange(CompilationUnitSizes sizes,
string_view available) {
LocationList list(sizes, available);
while (list.NextEntry()) {}
return available.substr(0, list.read_offset() - available.data());
}
// RangeList ///////////////////////////////////////////////////////////////////
// Code for reading entries out of a range list.
// For the moment we only care about finding the bounds of a list given its
// offset, so we don't actually vend any of the data.
class RangeList {
public:
RangeList(CompilationUnitSizes sizes, string_view data)
: sizes_(sizes), remaining_(data) {}
const char* read_offset() const { return remaining_.data(); }
bool NextEntry();
private:
CompilationUnitSizes sizes_;
string_view remaining_;
};
bool RangeList::NextEntry() {
uint64_t start, end;
start = sizes_.ReadAddress(&remaining_);
end = sizes_.ReadAddress(&remaining_);
if (start == 0 && end == 0) {
return false;
}
return true;
}
string_view GetRangeListRange(CompilationUnitSizes sizes,
string_view available) {
RangeList list(sizes, available);
while (list.NextEntry()) {
}
return available.substr(0, list.read_offset() - available.data());
}
// DIEReader ///////////////////////////////////////////////////////////////////
// Reads a sequence of DWARF DIE's (Debugging Information Entries) from the
// .debug_info or .debug_types section of a binary.
//
// Each DIE contains a tag and a set of attribute/value pairs. We rely on the
// abbreviations in an AbbrevTable to decode the DIEs.
class DIEReader {
public:
// Constructs a new DIEReader. Cannot be used until you call one of the
// Seek() methods below.
DIEReader(const File& file) : dwarf_(file) {}
// Returns true if we are at the end of DIEs for this compilation unit.
bool IsEof() const { return state_ == State::kEof; }
// DIEs exist in both .debug_info and .debug_types.
enum class Section {
kDebugInfo,
kDebugTypes
};
// Seeks to the overall start or the start of a specific compilation unit.
// Note that |header_offset| is the offset of the compilation unit *header*,
// not the offset of the first DIE.
bool SeekToCompilationUnit(Section section, uint64_t header_offset);
bool SeekToStart(Section section) {
return SeekToCompilationUnit(section, 0);
}
bool NextCompilationUnit();
// Advances to the next overall DIE, ignoring whether it happens to be a
// child, a sibling, or an uncle/aunt. Returns false at error or EOF.
bool NextDIE();
// Skips children of the current DIE, so that the next call to NextDIE()
// will read the next sibling (or parent, if no sibling exists).
bool SkipChildren();
const AbbrevTable::Abbrev& GetAbbrev() const {
assert(!IsEof());
return *current_abbrev_;
}
// Returns the tag of the current DIE.
// Requires that ReadCode() has been called at least once.
uint16_t GetTag() const { return GetAbbrev().tag; }
// Returns whether the current DIE has a child.
// Requires that ReadCode() has been called at least once.
bool HasChild() const { return GetAbbrev().has_child; }
const File& dwarf() const { return dwarf_; }
string_view unit_range() const { return unit_range_; }
CompilationUnitSizes unit_sizes() const { return unit_sizes_; }
uint32_t abbrev_version() const { return abbrev_version_; }
uint64_t debug_abbrev_offset() const { return debug_abbrev_offset_; }
// If both compileunit_name and strp_sink are set, this will automatically
// call strp_sink->AddFileRange(compileunit_name, <string range>) for every
// DW_FORM_strp attribute encountered. These strings occur in the .debug_str
// section.
void set_compileunit_name(absl::string_view name) {
unit_name_ = std::string(name);
}
void set_strp_sink(RangeSink* sink) { strp_sink_ = sink; }
void AddIndirectString(string_view range) const {
if (strp_sink_) {
strp_sink_->AddFileRange("dwarf_strp", unit_name_, range);
}
}
private:
BLOATY_DISALLOW_COPY_AND_ASSIGN(DIEReader);
template<typename> friend class AttrReader;
// APIs for our friends to use to update our state.
// Call to get the current read head where attributes should be parsed.
string_view ReadAttributesBegin() {
assert(state_ == State::kReadyToReadAttributes);
return remaining_;
}
// When some data has been parsed, this updates our read head.
bool ReadAttributesEnd(string_view remaining, uint64_t sibling) {
assert(state_ == State::kReadyToReadAttributes);
if (remaining.data() == nullptr) {
THROW("premature EOF reading DWARF attributes");
} else {
remaining_ = remaining;
sibling_offset_ = sibling;
state_ = State::kReadyToNext;
return true;
}
}
// Internal APIs.
bool ReadCompilationUnitHeader();
bool ReadCode();
enum class State {
kReadyToReadAttributes,
kReadyToNext,
kEof,
} state_;
std::string error_;
const File& dwarf_;
RangeSink* strp_sink_ = nullptr;
// Abbreviation for the current entry.
const AbbrevTable::Abbrev* current_abbrev_;
// Our current read position.
string_view remaining_;
uint64_t sibling_offset_;
int depth_ = 0;
// Data for the next compilation unit.
string_view next_unit_;
// All of the AbbrevTables we've read from .debug_abbrev, indexed by their
// offset within .debug_abbrev.
std::unordered_map<uint64_t, AbbrevTable> abbrev_tables_;
// Whether we are in .debug_types or .debug_info.
Section section_;
// Information about the current compilation unit.
uint64_t debug_abbrev_offset_;
std::string unit_name_;
string_view unit_range_;
CompilationUnitSizes unit_sizes_;
AbbrevTable* unit_abbrev_;
// A small integer that uniquely identifies the combination of unit_abbrev_
// and unit_sizes_. Attribute readers use this to know when they can reuse an
// existing (abbrev code) -> (Actions) mapping, since this table depends on
// both the current abbrev. table and the sizes.
uint32_t abbrev_version_;
std::map<std::pair<AbbrevTable*, CompilationUnitSizes>, uint32_t>
abbrev_versions_;
// Only for .debug_types
uint64_t unit_type_signature_;
uint64_t unit_type_offset_;
};
bool DIEReader::ReadCode() {
uint32_t code;
again:
if (remaining_.empty()) {
state_ = State::kEof;
return false;
}
code = ReadLEB128<uint32_t>(&remaining_);
if (code == 0) {
// null entry terminates a chain of sibling entries.
depth_--;
goto again;
}
if (!unit_abbrev_->GetAbbrev(code, ¤t_abbrev_)) {
THROW("couldn't find abbreviation for code");
}
state_ = State::kReadyToReadAttributes;
sibling_offset_ = 0;
if (HasChild()) {
depth_++;
}
return true;
}
bool DIEReader::NextCompilationUnit() {
return ReadCompilationUnitHeader();
}
bool DIEReader::NextDIE() {
if (state_ == State::kEof) {
return false;
}
assert(state_ == State::kReadyToNext);
return ReadCode();
}
bool DIEReader::SeekToCompilationUnit(Section section, uint64_t offset) {
section_ = section;
if (section == Section::kDebugInfo) {
next_unit_ = dwarf_.debug_info;
} else {
next_unit_ = dwarf_.debug_types;
}
SkipBytes(offset, &next_unit_);
return ReadCompilationUnitHeader();
}
bool DIEReader::ReadCompilationUnitHeader() {
if (next_unit_.empty()) {
state_ = State::kEof;
return false;
}
unit_range_ = next_unit_;
remaining_ = unit_sizes_.ReadInitialLength(&next_unit_);
unit_range_ = unit_range_.substr(
0, remaining_.size() + (remaining_.data() - unit_range_.data()));
unit_sizes_.ReadDWARFVersion(&remaining_);
if (unit_sizes_.dwarf_version() > 4) {
THROW("Data is in new DWARF format we don't understand");
}
debug_abbrev_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
unit_abbrev_ = &abbrev_tables_[debug_abbrev_offset_];
// If we haven't already read abbreviations for this debug_abbrev_offset_, we
// need to do so now.
if (unit_abbrev_->IsEmpty()) {
string_view abbrev_data = dwarf_.debug_abbrev;
SkipBytes(debug_abbrev_offset_, &abbrev_data);
unit_abbrev_->ReadAbbrevs(abbrev_data);
}
unit_sizes_.SetAddressSize(ReadMemcpy<uint8_t>(&remaining_));
if (section_ == Section::kDebugTypes) {
unit_type_signature_ = ReadMemcpy<uint64_t>(&remaining_);
unit_type_offset_ = unit_sizes_.ReadDWARFOffset(&remaining_);
}
auto abbrev_id = std::make_pair(unit_abbrev_, unit_sizes_);
auto insert_pair = abbrev_versions_.insert(
std::make_pair(abbrev_id, abbrev_versions_.size()));
// This will be either the newly inserted value or the existing one, if there
// was one.
abbrev_version_ = insert_pair.first->second;
return ReadCode();
}
// DWARF form parsing //////////////////////////////////////////////////////////
class AttrValue {
public:
AttrValue(uint64_t val) : uint_(val), type_(Type::kUint) {}
AttrValue(string_view val) : string_(val), type_(Type::kString) {}
enum class Type {
kUint,
kString
};
Type type() const { return type_; }
bool IsUint() const { return type_ == Type::kUint; }
bool IsString() const { return type_ == Type::kString; }
absl::optional<uint64_t> ToUint() const {
if (IsUint()) return uint_;
string_view str = string_;
switch (str.size()) {
case 1:
return ReadMemcpy<uint8_t>(&str);
case 2:
return ReadMemcpy<uint8_t>(&str);
case 4:
return ReadMemcpy<uint32_t>(&str);
case 8:
return ReadMemcpy<uint64_t>(&str);
}
return absl::nullopt;
}
uint64_t GetUint() const {
assert(type_ == Type::kUint);
return uint_;
}
string_view GetString() const {
assert(type_ == Type::kString);
return string_;
}
private:
union {
uint64_t uint_;
string_view string_;
};
Type type_;
};
template <class D>
string_view ReadBlock(string_view* data) {
D len = ReadMemcpy<D>(data);
return ReadPiece(len, data);
}
string_view ReadVariableBlock(string_view* data) {
uint64_t len = ReadLEB128<uint64_t>(data);
return ReadPiece(len, data);
}
template <class D>
string_view ReadIndirectString(const DIEReader& reader, string_view* data) {
D ofs = ReadMemcpy<D>(data);
StringTable table(reader.dwarf().debug_str);
string_view ret = table.ReadEntry(ofs);
reader.AddIndirectString(ret);
return ret;
}
AttrValue ParseAttr(const DIEReader& reader, uint8_t form, string_view* data) {
switch (form) {
case DW_FORM_indirect: {
uint16_t indirect_form = ReadLEB128<uint16_t>(data);
if (indirect_form == DW_FORM_indirect) {
THROW("indirect attribute has indirect form type");
}
return ParseAttr(reader, indirect_form, data);
}
case DW_FORM_ref1:
return AttrValue(ReadMemcpy<uint8_t>(data));
case DW_FORM_ref2:
return AttrValue(ReadMemcpy<uint16_t>(data));
case DW_FORM_ref4:
return AttrValue(ReadMemcpy<uint32_t>(data));
case DW_FORM_ref_sig8:
case DW_FORM_ref8:
return AttrValue(ReadMemcpy<uint64_t>(data));
case DW_FORM_addr:
address_size:
switch (reader.unit_sizes().address_size()) {
case 4:
return AttrValue(ReadMemcpy<uint32_t>(data));
case 8:
return AttrValue(ReadMemcpy<uint64_t>(data));
default:
BLOATY_UNREACHABLE();
}
case DW_FORM_ref_addr:
if (reader.unit_sizes().dwarf_version() <= 2) {
goto address_size;
}
ABSL_FALLTHROUGH_INTENDED;
case DW_FORM_sec_offset:
if (reader.unit_sizes().dwarf64()) {
return AttrValue(ReadMemcpy<uint64_t>(data));
} else {
return AttrValue(ReadMemcpy<uint32_t>(data));
}
case DW_FORM_udata:
return AttrValue(ReadLEB128<uint64_t>(data));
case DW_FORM_block1:
return AttrValue(ReadBlock<uint8_t>(data));
case DW_FORM_block2:
return AttrValue(ReadBlock<uint16_t>(data));
case DW_FORM_block4:
return AttrValue(ReadBlock<uint32_t>(data));
case DW_FORM_block:
case DW_FORM_exprloc:
return AttrValue(ReadVariableBlock(data));
case DW_FORM_string:
return AttrValue(ReadNullTerminated(data));
case DW_FORM_strp:
if (reader.unit_sizes().dwarf64()) {
return AttrValue(ReadIndirectString<uint64_t>(reader, data));
} else {
return AttrValue(ReadIndirectString<uint32_t>(reader, data));
}
case DW_FORM_data1:
return AttrValue(ReadPiece(1, data));
case DW_FORM_data2:
return AttrValue(ReadPiece(2, data));
case DW_FORM_data4:
return AttrValue(ReadPiece(4, data));
case DW_FORM_data8:
return AttrValue(ReadPiece(8, data));
// Bloaty doesn't currently care about any bool or signed data.
// So we fudge it a bit and just stuff these in a uint64.
case DW_FORM_flag_present:
return AttrValue(1);
case DW_FORM_flag:
return AttrValue(ReadMemcpy<uint8_t>(data));
case DW_FORM_sdata:
return AttrValue(ReadLEB128<uint64_t>(data));
default:
THROWF("Don't know how to parse DWARF form: $0", form);
}
}
// AttrReader //////////////////////////////////////////////////////////////////
// Parses a DIE's attributes, calling user callbacks with the parsed values.
template <class T>
class AttrReader {
public:
typedef void CallbackFunc(T* container, AttrValue val);
void OnAttribute(DwarfAttribute attr, CallbackFunc* func) {
attributes_[attr] = func;
}
// Reads all attributes for this DIE, storing the ones we were expecting.
void ReadAttributes(DIEReader* reader, T* container) {
string_view data = reader->ReadAttributesBegin();
const AbbrevTable::Abbrev& abbrev = reader->GetAbbrev();
for (auto attr : abbrev.attr) {
AttrValue value = ParseAttr(*reader, attr.form, &data);
auto it = attributes_.find(attr.name);
if (it != attributes_.end()) {
it->second(container, value);
}
}
reader->ReadAttributesEnd(data, 0);
}
private:
std::unordered_map<int, CallbackFunc*> attributes_;
};
// From DIEReader, defined here because it depends on FixedAttrReader.
bool DIEReader::SkipChildren() {
assert(state_ == State::kReadyToNext);
if (!HasChild()) {
return true;
}
int target_depth = depth_ - 1;
dwarf::AttrReader<void> attr_reader;
while (depth_ > target_depth) {
// TODO(haberman): use DW_AT_sibling to optimize skipping when it is
// available.
if (!NextDIE()) {
return false;
}
attr_reader.ReadAttributes(this, nullptr);
}
return true;
}