From fb0434bd726b230ec8365573bf57c98eaf6c1201 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Fri, 19 Jul 2019 14:13:51 +0200 Subject: [PATCH 01/31] Added preliminary support for S_INLINESITE --- src/common.rs | 3 + src/symbol/binary_annotations.rs | 184 +++++++++++++++++++++++++++++++ src/symbol/constants.rs | 57 ++++++++++ src/symbol/mod.rs | 49 ++++++++ 4 files changed, 293 insertions(+) create mode 100644 src/symbol/binary_annotations.rs diff --git a/src/common.rs b/src/common.rs index 275d00e..ed4321b 100644 --- a/src/common.rs +++ b/src/common.rs @@ -19,6 +19,9 @@ use crate::tpi::constants; /// `TypeIndex` refers to a type somewhere in `PDB.type_information()`. pub type TypeIndex = u32; +/// `ItemId` refers to an item ID. +pub type ItemId = u32; + /// An error that occurred while reading or parsing the PDB. #[derive(Debug)] pub enum Error { diff --git a/src/symbol/binary_annotations.rs b/src/symbol/binary_annotations.rs new file mode 100644 index 0000000..a424ee0 --- /dev/null +++ b/src/symbol/binary_annotations.rs @@ -0,0 +1,184 @@ +use std::result; + +use crate::common::*; +use crate::symbol::constants::*; +use crate::FallibleIterator; + +/// An iterator over binary annotations used by `S_INLINESITE` +pub struct BinaryAnnotationsIter<'t> { + buffer: &'t [u8], +} + +impl<'t> BinaryAnnotationsIter<'t> { + /// Initializes the iterator by parsing the buffer contents. + pub fn new(buffer: &'t [u8]) -> BinaryAnnotationsIter<'t> { + BinaryAnnotationsIter { buffer } + } + + fn pop_front(&mut self) -> Result { + let (first, rest) = self.buffer.split_first().ok_or(Error::UnexpectedEof)?; + self.buffer = rest; + Ok(*first) + } + + fn get_compressed_annotation(&mut self) -> Result { + let b1 = u32::from(self.pop_front()?); + if (b1 & 0x80) == 0 { + return Ok(b1); + } + let b2 = u32::from(self.pop_front()?); + if (b1 & 0xc0) == 0x80 { + return Ok((b1 & 0x3f) << 8 | b2); + } + let b3 = u32::from(self.pop_front()?); + let b4 = u32::from(self.pop_front()?); + if (b1 & 0xe0) == 0xc0 { + return Ok(((b1 & 0x1f) << 24) | (b2 << 16) | (b3 << 8) | b4); + } + Err(Error::SymbolTooShort) + } +} + +fn decode_signed_operand(value: u32) -> i32 { + if value & 1 != 0 { + -((value >> 1) as i32) + } else { + (value >> 1) as i32 + } +} + +impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { + type Item = BinaryAnnotation; + type Error = Error; + + fn next(&mut self) -> result::Result, Self::Error> { + if self.buffer.is_empty() { + return Ok(None); + } + Ok(Some( + match BinaryAnnotationOpcode::from(self.get_compressed_annotation()?) { + // invalid opcodes mark the end of the stream. + BinaryAnnotationOpcode::Invalid => { + self.buffer = &[][..]; + return Ok(None); + } + BinaryAnnotationOpcode::CodeOffset => { + BinaryAnnotation::CodeOffset(self.get_compressed_annotation()?) + } + BinaryAnnotationOpcode::ChangeCodeOffsetBase => { + BinaryAnnotation::ChangeCodeOffsetBase(self.get_compressed_annotation()?) + } + BinaryAnnotationOpcode::ChangeCodeOffset => { + BinaryAnnotation::ChangeCodeOffset(self.get_compressed_annotation()?) + } + BinaryAnnotationOpcode::ChangeCodeLength => { + BinaryAnnotation::ChangeCodeLength(self.get_compressed_annotation()?) + } + BinaryAnnotationOpcode::ChangeFile => { + BinaryAnnotation::ChangeFile(self.get_compressed_annotation()?) + } + BinaryAnnotationOpcode::ChangeLineOffset => BinaryAnnotation::ChangeLineOffset( + decode_signed_operand(self.get_compressed_annotation()?), + ), + BinaryAnnotationOpcode::ChangeLineEndDelta => { + BinaryAnnotation::ChangeLineEndDelta(self.get_compressed_annotation()?) + } + BinaryAnnotationOpcode::ChangeRangeKind => { + BinaryAnnotation::ChangeRangeKind(self.get_compressed_annotation()?) + } + BinaryAnnotationOpcode::ChangeColumnStart => { + BinaryAnnotation::ChangeColumnStart(self.get_compressed_annotation()?) + } + BinaryAnnotationOpcode::ChangeColumnEndDelta => { + BinaryAnnotation::ChangeColumnEndDelta(decode_signed_operand( + self.get_compressed_annotation()?, + )) + } + BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset => { + let annotation = self.get_compressed_annotation()?; + BinaryAnnotation::ChangeCodeOffsetAndLineOffset( + decode_signed_operand(annotation >> 4), + decode_signed_operand(annotation & 0xf), + ) + } + BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset => { + BinaryAnnotation::ChangeCodeLengthAndCodeOffset( + self.get_compressed_annotation()?, + self.get_compressed_annotation()?, + ) + } + BinaryAnnotationOpcode::ChangeColumnEnd => { + BinaryAnnotation::ChangeColumnEnd(self.get_compressed_annotation()?) + } + }, + )) + } +} + +/// Represents a parsed `BinaryAnnotation`. +/// +/// Binary annotations are used by `S_INLINESITE` to encode opcodes for how to +/// evaluate the state changes for inline information. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum BinaryAnnotation { + CodeOffset(u32), + ChangeCodeOffsetBase(u32), + ChangeCodeOffset(u32), + ChangeCodeLength(u32), + ChangeFile(u32), + ChangeLineOffset(i32), + ChangeLineEndDelta(u32), + ChangeRangeKind(u32), + ChangeColumnStart(u32), + ChangeColumnEndDelta(i32), + ChangeCodeOffsetAndLineOffset(i32, i32), + ChangeCodeLengthAndCodeOffset(u32, u32), + ChangeColumnEnd(u32), +} + +#[test] +fn test_binary_annotation_iter() { + let inp = b"\x0b\x03\x06\n\x03\x08\x06\x06\x03-\x06\x08\x03\x07\x0br\x06\x06\x0c\x03\x07\x06\x0f\x0c\x06\x05\x00\x00"; + let items = BinaryAnnotationsIter::new(inp).collect::>().unwrap(); + assert_eq!( + items, + vec![ + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(0, -1), + BinaryAnnotation::ChangeLineOffset(5), + BinaryAnnotation::ChangeCodeOffset(8), + BinaryAnnotation::ChangeLineOffset(3), + BinaryAnnotation::ChangeCodeOffset(45), + BinaryAnnotation::ChangeLineOffset(4), + BinaryAnnotation::ChangeCodeOffset(7), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(-3, 1), + BinaryAnnotation::ChangeLineOffset(3), + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(3, 7), + BinaryAnnotation::ChangeLineOffset(-7), + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(6, 5) + ] + ); + + let inp = b"\x03P\x06\x0e\x03\x0c\x06\x04\x032\x06\x06\x03T\x0b#\x0b\\\x0bC\x0b/\x06\x04\x0c-\t\x03;\x06\x1d\x0c\x05\x06\x00\x00"; + let items = BinaryAnnotationsIter::new(inp).collect::>().unwrap(); + assert_eq!( + items, + vec![ + BinaryAnnotation::ChangeCodeOffset(80), + BinaryAnnotation::ChangeLineOffset(7), + BinaryAnnotation::ChangeCodeOffset(12), + BinaryAnnotation::ChangeLineOffset(2), + BinaryAnnotation::ChangeCodeOffset(50), + BinaryAnnotation::ChangeLineOffset(3), + BinaryAnnotation::ChangeCodeOffset(84), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(1, -1), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(-2, 6), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(2, -1), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(1, -7), + BinaryAnnotation::ChangeLineOffset(2), + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(45, 9), + BinaryAnnotation::ChangeCodeOffset(59), + BinaryAnnotation::ChangeLineOffset(-14), + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(5, 6), + ] + ); +} diff --git a/src/symbol/constants.rs b/src/symbol/constants.rs index c397058..874915f 100644 --- a/src/symbol/constants.rs +++ b/src/symbol/constants.rs @@ -544,3 +544,60 @@ impl From for SourceLanguage { } } } + +/// These values correspond to the BinaryAnnotationOpcode enum from the +/// cvinfo.h +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum BinaryAnnotationOpcode { + /// link time pdb contains PADDINGs + Invalid = 0, + /// param : start offset + CodeOffset = 1, + /// param : nth separated code chunk (main code chunk == 0) + ChangeCodeOffsetBase = 2, + /// param : delta of offset + ChangeCodeOffset = 3, + /// param : length of code, default next start + ChangeCodeLength = 4, + /// param : fileId + ChangeFile = 5, + /// param : line offset (signed) + ChangeLineOffset = 6, + /// param : how many lines, default 1 + ChangeLineEndDelta = 7, + /// param : either 1 (default, for statement) + /// or 0 (for expression) + ChangeRangeKind = 8, + /// param : start column number, 0 means no column info + ChangeColumnStart = 9, + /// param : end column number delta (signed) + ChangeColumnEndDelta = 10, + /// param : ((sourceDelta << 4) | CodeDelta) + ChangeCodeOffsetAndLineOffset = 11, + /// param : codeLength, codeOffset + ChangeCodeLengthAndCodeOffset = 12, + /// param : end column number + ChangeColumnEnd = 13, +} + +impl From for BinaryAnnotationOpcode { + fn from(value: u32) -> Self { + match value { + 0 => BinaryAnnotationOpcode::Invalid, + 1 => BinaryAnnotationOpcode::CodeOffset, + 2 => BinaryAnnotationOpcode::ChangeCodeOffsetBase, + 3 => BinaryAnnotationOpcode::ChangeCodeOffset, + 4 => BinaryAnnotationOpcode::ChangeCodeLength, + 5 => BinaryAnnotationOpcode::ChangeFile, + 6 => BinaryAnnotationOpcode::ChangeLineOffset, + 7 => BinaryAnnotationOpcode::ChangeLineEndDelta, + 8 => BinaryAnnotationOpcode::ChangeRangeKind, + 9 => BinaryAnnotationOpcode::ChangeColumnStart, + 10 => BinaryAnnotationOpcode::ChangeColumnEndDelta, + 11 => BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset, + 12 => BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset, + 13 => BinaryAnnotationOpcode::ChangeColumnEnd, + _ => BinaryAnnotationOpcode::Invalid, + } + } +} diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 98d5fda..0e55913 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -12,7 +12,10 @@ use crate::common::*; use crate::msf::*; use crate::FallibleIterator; +mod binary_annotations; mod constants; + +pub use self::binary_annotations::*; use self::constants::*; /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, @@ -124,6 +127,8 @@ impl<'t> Symbol<'t> { S_LPROC32 | S_LPROC32_ST | S_GPROC32 | S_GPROC32_ST | S_LPROC32_ID | S_GPROC32_ID | S_LPROC32_DPC | S_LPROC32_DPC_ID => 35, + S_INLINESITE => 12, + S_OBJNAME | S_OBJNAME_ST => 4, S_COMPILE3 => 22, @@ -159,6 +164,26 @@ impl<'t> Symbol<'t> { Ok(&self.0[2..(data_length + 2)]) } + /// Returns additional data stored in the symbol. + pub fn extra_data(&self) -> Result> { + let data_length = self.data_length()?; + let buf = &self.0[2 + data_length..]; + + if self.raw_kind() == S_INLINESITE { + Ok(Some(buf)) + } else { + Ok(None) + } + } + + /// Interprets the extra data as binary annotations and + /// returns an iterator over it. + pub fn iter_binary_annotations(&self) -> Result> { + Ok(BinaryAnnotationsIter::new( + self.extra_data()?.unwrap_or(&[][..]), + )) + } + /// Returns the name of the symbol. Note that the underlying buffer is owned by the /// `SymbolTable`. pub fn name(&self) -> Result> { @@ -168,6 +193,12 @@ impl<'t> Symbol<'t> { // figure out where the name is let mut buf = ParseBuffer::from(&self.0[2 + data_length..]); + // some things do not have a real name but store something else + // there instead. + if self.raw_kind() == S_INLINESITE { + return Ok(RawString::from("")); + } + // names come in two varieties: if self.raw_kind() < S_ST_MAX { // Pascal-style name @@ -307,6 +338,12 @@ fn parse_symbol_data(kind: u16, data: &[u8]) -> Result { flags: ProcedureFlags::new(buf.parse_u8()?), })), + S_INLINESITE => Ok(SymbolData::InlineSite(InlineSite { + parent: buf.parse_u32()?, + end: buf.parse_u32()?, + inlinee: buf.parse_u32()?, + })), + S_OBJNAME | S_OBJNAME_ST => Ok(SymbolData::ObjName(ObjNameSymbol { signature: buf.parse_u32()?, })), @@ -385,6 +422,9 @@ pub enum SymbolData { // S_LPROC32_DPC_ID (0x1156) Procedure(ProcedureSymbol), + // S_INLINESITE (0x114d) + InlineSite(InlineSite), + // S_OBJNAME (0x1101) | S_OBJNAME_ST (0x0009) ObjName(ObjNameSymbol), @@ -525,6 +565,15 @@ pub struct ProcedureSymbol { pub flags: ProcedureFlags, } +/// The information parsed from a symbol record with kind +/// `S_INLINESITE`. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct InlineSite { + pub parent: u32, + pub end: u32, + pub inlinee: ItemId, +} + /// The information parsed from a symbol record with kind /// `S_OBJNAME`, or `S_OBJNAME_ST`. #[derive(Debug, Copy, Clone, Eq, PartialEq)] From 0ace7a660419be43943b1967029532f005371007 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Fri, 19 Jul 2019 20:21:17 +0200 Subject: [PATCH 02/31] Added S_INLINESITE2 support --- src/symbol/mod.rs | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 0e55913..7549b77 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -128,6 +128,7 @@ impl<'t> Symbol<'t> { | S_LPROC32_DPC | S_LPROC32_DPC_ID => 35, S_INLINESITE => 12, + S_INLINESITE2 => 16, S_OBJNAME | S_OBJNAME_ST => 4, @@ -169,10 +170,9 @@ impl<'t> Symbol<'t> { let data_length = self.data_length()?; let buf = &self.0[2 + data_length..]; - if self.raw_kind() == S_INLINESITE { - Ok(Some(buf)) - } else { - Ok(None) + match self.raw_kind() { + S_INLINESITE | S_INLINESITE2 => Ok(Some(buf)), + _ => Ok(None), } } @@ -195,19 +195,18 @@ impl<'t> Symbol<'t> { // some things do not have a real name but store something else // there instead. - if self.raw_kind() == S_INLINESITE { - return Ok(RawString::from("")); - } - - // names come in two varieties: - if self.raw_kind() < S_ST_MAX { - // Pascal-style name - let name = buf.parse_u8_pascal_string()?; - Ok(name) - } else { - // NUL-terminated name - let name = buf.parse_cstring()?; - Ok(name) + match self.raw_kind() { + S_INLINESITE | S_INLINESITE2 => Ok(RawString::from("")), + kind if kind < S_ST_MAX => { + // Pascal-style name + let name = buf.parse_u8_pascal_string()?; + Ok(name) + } + _ => { + // NUL-terminated name + let name = buf.parse_cstring()?; + Ok(name) + } } } } @@ -338,10 +337,15 @@ fn parse_symbol_data(kind: u16, data: &[u8]) -> Result { flags: ProcedureFlags::new(buf.parse_u8()?), })), - S_INLINESITE => Ok(SymbolData::InlineSite(InlineSite { + S_INLINESITE | S_INLINESITE2 => Ok(SymbolData::InlineSite(InlineSite { parent: buf.parse_u32()?, end: buf.parse_u32()?, inlinee: buf.parse_u32()?, + invocations: if kind == S_INLINESITE2 { + Some(buf.parse_u32()?) + } else { + None + }, })), S_OBJNAME | S_OBJNAME_ST => Ok(SymbolData::ObjName(ObjNameSymbol { @@ -566,12 +570,13 @@ pub struct ProcedureSymbol { } /// The information parsed from a symbol record with kind -/// `S_INLINESITE`. +/// `S_INLINESITE` or `S_INLINESITE2`. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct InlineSite { pub parent: u32, pub end: u32, pub inlinee: ItemId, + pub invocations: Option, } /// The information parsed from a symbol record with kind From 9b641c88ca7e2865497ae10e9cd073f270cf1135 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sat, 20 Jul 2019 01:33:18 +0200 Subject: [PATCH 03/31] Changed error --- src/symbol/binary_annotations.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/symbol/binary_annotations.rs b/src/symbol/binary_annotations.rs index a424ee0..1209231 100644 --- a/src/symbol/binary_annotations.rs +++ b/src/symbol/binary_annotations.rs @@ -35,7 +35,7 @@ impl<'t> BinaryAnnotationsIter<'t> { if (b1 & 0xe0) == 0xc0 { return Ok(((b1 & 0x1f) << 24) | (b2 << 16) | (b3 << 8) | b4); } - Err(Error::SymbolTooShort) + Err(Error::UnexpectedEof) } } From 6700db9a75ea528d48615fe9e28d095db9058ddd Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sat, 20 Jul 2019 17:10:10 +0200 Subject: [PATCH 04/31] Added basic support for inlinee lines --- src/modi/c13.rs | 94 +++++++++++++++++++++++++++++++++++++++++++ src/modi/constants.rs | 5 +++ src/modi/mod.rs | 41 +++++++++++++++++++ 3 files changed, 140 insertions(+) diff --git a/src/modi/c13.rs b/src/modi/c13.rs index 9936239..b9679bc 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -112,6 +112,63 @@ impl DebugLinesHeader { } } +#[derive(Clone, Copy, Debug, Default, Pread)] +struct DebugInlineesHeader { + /// The signature of the inlinees + signature: u32, +} + +#[derive(Clone, Copy, Debug, Default, Pread)] +pub struct InlineeSourceLine { + pub inlinee: ItemId, + pub file_id: u32, + pub source_line_num: u32, +} + +#[derive(Debug, Clone)] +struct DebugInlineesSubsection<'a> { + header: DebugInlineesHeader, + data: &'a [u8], +} + +impl<'a> DebugInlineesSubsection<'a> { + fn parse(data: &'a [u8]) -> Result { + let mut buf = ParseBuffer::from(data); + let header = buf.parse()?; + let data = &data[buf.pos()..]; + Ok(DebugInlineesSubsection { header, data }) + } + + fn lines(&self) -> DebugInlineesSourceLineIterator<'a> { + DebugInlineesSourceLineIterator { + header: self.header, + buf: ParseBuffer::from(self.data), + } + } +} + +#[derive(Debug, Clone, Default)] +struct DebugInlineesSourceLineIterator<'a> { + header: DebugInlineesHeader, + buf: ParseBuffer<'a>, +} + +impl<'a> FallibleIterator for DebugInlineesSourceLineIterator<'a> { + type Item = InlineeSourceLine; + type Error = Error; + + fn next(&mut self) -> Result> { + if self.header.signature != constants::CV_INLINEE_SOURCE_LINE_SIGNATURE { + return Ok(None); + } + if self.buf.is_empty() { + Ok(None) + } else { + Ok(Some(self.buf.parse()?)) + } + } +} + struct DebugLinesSubsection<'a> { header: DebugLinesHeader, data: &'a [u8], @@ -551,6 +608,36 @@ impl<'a> FallibleIterator for C13LineIterator<'a> { } } +#[derive(Clone, Debug, Default)] +pub struct C13InlineeLineIterator<'a> { + /// iterator over the inline source lines + lines: DebugInlineesSourceLineIterator<'a>, + /// Iterator over all subsections in the current module. + sections: DebugSubsectionIterator<'a>, +} + +impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { + type Item = InlineeSourceLine; + type Error = Error; + + fn next(&mut self) -> Result> { + loop { + if let Some(line) = self.lines.next()? { + return Ok(Some(line)); + } + if let Some(section) = self.sections.next()? { + if section.kind == DebugSubsectionKind::InlineeLines { + let inlinees_section = DebugInlineesSubsection::parse(section.data)?; + self.lines = inlinees_section.lines(); + } + continue; + } else { + return Ok(None); + } + } + } +} + #[derive(Clone, Debug, Default)] pub struct C13FileIterator<'a> { checksums: DebugFileChecksumsIterator<'a>, @@ -629,6 +716,13 @@ impl<'a> C13LineProgram<'a> { } } + pub(crate) fn inlinee_lines(&self) -> C13InlineeLineIterator<'a> { + C13InlineeLineIterator { + sections: DebugSubsectionIterator::new(self.data), + lines: Default::default(), + } + } + pub(crate) fn get_file_info(&self, index: FileIndex) -> Result> { // The file index actually contains the byte offset value into the file_checksums // subsection. Therefore, treat it as the offset. diff --git a/src/modi/constants.rs b/src/modi/constants.rs index 14b406d..834e3ae 100644 --- a/src/modi/constants.rs +++ b/src/modi/constants.rs @@ -12,3 +12,8 @@ pub const CV_SIGNATURE_C13: u32 = 4; pub const DEBUG_S_IGNORE: u32 = 0x8000_0000; /// Flag indicating that column information is present. pub const CV_LINES_HAVE_COLUMNS: u16 = 0x1; + +/// Flag indicating the default format of `DEBUG_S_INLINEELINEINFO` +pub const CV_INLINEE_SOURCE_LINE_SIGNATURE: u32 = 0x0; +/// Flag indicating the extended format of `DEBUG_S_INLINEELINEINFO` +pub const CV_INLINEE_SOURCE_LINE_SIGNATURE_EX: u32 = 0x1; diff --git a/src/modi/mod.rs b/src/modi/mod.rs index 8c319b7..609d445 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -9,6 +9,8 @@ use crate::FallibleIterator; mod c13; mod constants; +pub use crate::modi::c13::InlineeSourceLine; + #[derive(Clone, Copy, Debug)] enum LinesSize { C11(usize), @@ -169,6 +171,15 @@ impl<'a> LineProgram<'a> { } } + /// Returns an iterator over all inlinees of this module. + pub fn inlinee_lines(&self) -> InlineeLineIterator<'a> { + match self.inner { + LineProgramInner::C13(ref inner) => InlineeLineIterator { + inner: InlineeLineIteratorInner::C13(inner.inlinee_lines()), + }, + } + } + /// Returns an iterator over all file records of this module. pub fn files(&self) -> FileIterator<'a> { match self.inner { @@ -233,6 +244,36 @@ impl<'a> FallibleIterator for LineIterator<'a> { } } +#[derive(Clone, Debug)] +enum InlineeLineIteratorInner<'a> { + C13(c13::C13InlineeLineIterator<'a>), +} + +/// An iterator over line information records in a module. +#[derive(Clone, Debug)] +pub struct InlineeLineIterator<'a> { + inner: InlineeLineIteratorInner<'a>, +} + +impl Default for InlineeLineIterator<'_> { + fn default() -> Self { + InlineeLineIterator { + inner: InlineeLineIteratorInner::C13(Default::default()), + } + } +} + +impl<'a> FallibleIterator for InlineeLineIterator<'a> { + type Item = InlineeSourceLine; + type Error = Error; + + fn next(&mut self) -> Result> { + match self.inner { + InlineeLineIteratorInner::C13(ref mut inner) => inner.next(), + } + } +} + #[derive(Clone, Debug)] enum FileIteratorInner<'a> { C13(c13::C13FileIterator<'a>), From 865a7236fcf46e3bdece9c6dbcf487944aa52e45 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 31 Jul 2019 13:14:08 +0200 Subject: [PATCH 05/31] Refactor symbols --- examples/pdb2hpp.rs | 8 +- examples/pdb_lines.rs | 2 +- examples/pdb_symbols.rs | 15 +- src/common.rs | 113 ++- src/lib.rs | 4 +- src/omap.rs | 2 +- src/pdb.rs | 2 +- src/symbol/annotations.rs | 263 +++++++ src/symbol/binary_annotations.rs | 184 ----- src/symbol/constants.rs | 69 +- src/symbol/mod.rs | 1210 +++++++++++++++++++---------- src/tpi/data.rs | 4 +- tests/omap_address_translation.rs | 16 +- tests/symbol_table.rs | 7 +- 14 files changed, 1146 insertions(+), 753 deletions(-) create mode 100644 src/symbol/annotations.rs delete mode 100644 src/symbol/binary_annotations.rs diff --git a/examples/pdb2hpp.rs b/examples/pdb2hpp.rs index a008969..f8aa6c1 100644 --- a/examples/pdb2hpp.rs +++ b/examples/pdb2hpp.rs @@ -162,14 +162,14 @@ impl<'p> Class<'p> { // TODO: attributes (static, virtual, etc.) self.fields.push(Field { type_name: type_name(type_finder, data.field_type, needed_types)?, - name: data.name.clone(), + name: data.name, offset: data.offset, }); } pdb::TypeData::Method(ref data) => { let method = Method::find( - data.name.clone(), + data.name, data.attributes, type_finder, data.method_type, @@ -195,7 +195,7 @@ impl<'p> Class<'p> { { // hooray let method = Method::find( - data.name.clone(), + data.name, attributes, type_finder, method_type, @@ -425,7 +425,7 @@ impl<'p> Enum<'p> { // ignore everything else even though that's sad if let pdb::TypeData::Enumerate(ref data) = field { self.values.push(EnumValue { - name: data.name.clone(), + name: data.name, value: data.value, }); } diff --git a/examples/pdb_lines.rs b/examples/pdb_lines.rs index 3a8717d..8a2406e 100644 --- a/examples/pdb_lines.rs +++ b/examples/pdb_lines.rs @@ -33,7 +33,7 @@ fn dump_pdb(filename: &str) -> pdb::Result<()> { while let Some(symbol) = symbols.next()? { if let Ok(SymbolData::Procedure(proc)) = symbol.parse() { let sign = if proc.global { "+" } else { "-" }; - println!("{} {}", sign, symbol.name()?.to_string()); + println!("{} {}", sign, proc.name); let mut lines = program.lines_at_offset(proc.offset); while let Some(line_info) = lines.next()? { diff --git a/examples/pdb_symbols.rs b/examples/pdb_symbols.rs index 0bfb4db..895fb86 100644 --- a/examples/pdb_symbols.rs +++ b/examples/pdb_symbols.rs @@ -11,23 +11,20 @@ fn print_usage(program: &str, opts: Options) { fn print_row(offset: PdbInternalSectionOffset, kind: &str, name: pdb::RawString<'_>) { println!( "{:x}\t{:x}\t{}\t{}", - offset.section, - offset.offset, - kind, - name.to_string() + offset.section, offset.offset, kind, name ); } fn print_symbol(symbol: &pdb::Symbol<'_>) -> pdb::Result<()> { match symbol.parse()? { - pdb::SymbolData::PublicSymbol(data) => { - print_row(data.offset, "function", symbol.name()?); + pdb::SymbolData::Public(data) => { + print_row(data.offset, "function", data.name); } - pdb::SymbolData::DataSymbol(data) => { - print_row(data.offset, "data", symbol.name()?); + pdb::SymbolData::Data(data) => { + print_row(data.offset, "data", data.name); } pdb::SymbolData::Procedure(data) => { - print_row(data.offset, "function", symbol.name()?); + print_row(data.offset, "function", data.name); } _ => { // ignore everything else diff --git a/src/common.rs b/src/common.rs index ed4321b..340cc76 100644 --- a/src/common.rs +++ b/src/common.rs @@ -99,6 +99,9 @@ pub enum Error { /// The lines table is missing. LinesNotFound, + + /// A binary annotation was compressed incorrectly. + InvalidCompressedAnnotation, } impl std::error::Error for Error { @@ -138,6 +141,7 @@ impl std::error::Error for Error { Error::UnimplementedFileChecksumKind(_) => "Unknown source file checksum kind", Error::InvalidFileChecksumOffset(_) => "Invalid source file checksum offset", Error::LinesNotFound => "Line information not found for a module", + Error::InvalidCompressedAnnotation => "Invalid compressed annoation", } } } @@ -625,6 +629,7 @@ macro_rules! def_parse { ( $( ($n:ident, $t:ty) ),* $(,)* ) => { $(#[doc(hidden)] #[inline] + #[allow(unused)] pub fn $n(&mut self) -> Result<$t> { Ok(self.parse()?) })* @@ -676,11 +681,24 @@ impl<'b> ParseBuffer<'b> { /// Parse an object that implements `Pread`. pub fn parse(&mut self) -> Result where - T: TryFromCtx<'b, Endian, [u8], Error = scroll::Error, Size = usize>, + T: TryFromCtx<'b, Endian, [u8], Size = usize>, + T::Error: From, + Error: From, { Ok(self.0.gread_with(&mut self.1, LE)?) } + /// Parse an object that implements `Pread` with the given context. + pub fn parse_with(&mut self, ctx: C) -> Result + where + T: TryFromCtx<'b, C, [u8], Size = usize>, + T::Error: From, + Error: From, + C: Copy, + { + Ok(self.0.gread_with(&mut self.1, ctx)?) + } + def_parse!( (parse_u8, u8), (parse_u16, u16), @@ -728,69 +746,6 @@ impl<'b> ParseBuffer<'b> { Err(Error::UnexpectedEof) } } - - pub fn parse_variant(&mut self) -> Result { - let leaf = self.parse_u16()?; - if leaf < constants::LF_NUMERIC { - // the u16 directly encodes a value - return Ok(Variant::U16(leaf)); - } - - match leaf { - constants::LF_CHAR => Ok(Variant::U8(self.parse_u8()?)), - constants::LF_SHORT => Ok(Variant::I16(self.parse_i16()?)), - constants::LF_LONG => Ok(Variant::I32(self.parse_i32()?)), - constants::LF_QUADWORD => Ok(Variant::I64(self.parse_i64()?)), - constants::LF_USHORT => Ok(Variant::U16(self.parse_u16()?)), - constants::LF_ULONG => Ok(Variant::U32(self.parse_u32()?)), - constants::LF_UQUADWORD => Ok(Variant::U64(self.parse_u64()?)), - _ => { - if cfg!(debug_assertions) { - unreachable!(); - } else { - Err(Error::UnexpectedNumericPrefix(leaf)) - } - } - } - } - - #[doc(hidden)] - #[inline] - pub(crate) fn get_variant_size(&mut self) -> usize { - let leaf = self.parse_u16(); - match leaf { - Ok(leaf) => { - if leaf < constants::LF_NUMERIC { - // the u16 directly encodes a value - return 2; - } - - match leaf { - constants::LF_CHAR => 2 + 1, - constants::LF_SHORT => 2 + 2, - constants::LF_LONG => 2 + 4, - constants::LF_QUADWORD => 2 + 8, - constants::LF_USHORT => 2 + 2, - constants::LF_ULONG => 2 + 4, - constants::LF_UQUADWORD => 2 + 8, - _ => { - if cfg!(debug_assertions) { - unreachable!(); - } else { - 2 - } - } - } - } - Err(_) => { - if cfg!(debug_assertions) { - unreachable!(); - } else { - 2 - } - } - } - } } impl Default for ParseBuffer<'_> { @@ -828,8 +783,8 @@ pub enum Variant { I64(i64), } -impl ::std::fmt::Display for Variant { - fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { +impl fmt::Display for Variant { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { Variant::U8(value) => write!(f, "{}", value), Variant::U16(value) => write!(f, "{}", value), @@ -843,10 +798,34 @@ impl ::std::fmt::Display for Variant { } } +impl<'a> TryFromCtx<'a, Endian> for Variant { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'a [u8], le: Endian) -> Result<(Self, Self::Size)> { + let mut offset = 0; + + let variant = match this.gread_with(&mut offset, le)? { + value if value < constants::LF_NUMERIC => Variant::U16(value), + constants::LF_CHAR => Variant::U8(this.gread_with(&mut offset, le)?), + constants::LF_SHORT => Variant::I16(this.gread_with(&mut offset, le)?), + constants::LF_LONG => Variant::I32(this.gread_with(&mut offset, le)?), + constants::LF_QUADWORD => Variant::I64(this.gread_with(&mut offset, le)?), + constants::LF_USHORT => Variant::U16(this.gread_with(&mut offset, le)?), + constants::LF_ULONG => Variant::U32(this.gread_with(&mut offset, le)?), + constants::LF_UQUADWORD => Variant::U64(this.gread_with(&mut offset, le)?), + _ if cfg!(debug_assertions) => unreachable!(), + other => return Err(Error::UnexpectedNumericPrefix(other)), + }; + + Ok((variant, offset)) + } +} + /// `RawString` refers to a `&[u8]` that physically resides somewhere inside a PDB data structure. /// /// A `RawString` may not be valid UTF-8. -#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Copy, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct RawString<'b>(&'b [u8]); impl fmt::Debug for RawString<'_> { diff --git a/src/lib.rs b/src/lib.rs index f661497..f2b4f1b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,10 +29,10 @@ //! let mut symbols = symbol_table.iter(); //! while let Some(symbol) = symbols.next()? { //! match symbol.parse() { -//! Ok(pdb::SymbolData::PublicSymbol(data)) if data.function => { +//! Ok(pdb::SymbolData::Public(data)) if data.function => { //! // we found the location of a function! //! let rva = data.offset.to_rva(&address_map).unwrap_or_default(); -//! println!("{} is {}", rva, symbol.name()?); +//! println!("{} is {}", rva, data.name); //! # count += 1; //! } //! _ => {} diff --git a/src/omap.rs b/src/omap.rs index cb7cc7d..f0f5339 100644 --- a/src/omap.rs +++ b/src/omap.rs @@ -374,7 +374,7 @@ impl FusedIterator for PdbInternalRvaRangeIter<'_> {} /// /// # let symbol_table = pdb.global_symbols()?; /// # let symbol = symbol_table.iter().next()?.unwrap(); -/// # match symbol.parse() { Ok(pdb::SymbolData::PublicSymbol(pubsym)) => { +/// # match symbol.parse() { Ok(pdb::SymbolData::Public(pubsym)) => { /// // Obtain some section offset, eg from a symbol, and convert it /// match pubsym.offset.to_rva(&address_map) { /// Some(rva) => { diff --git a/src/pdb.rs b/src/pdb.rs index f08b634..de1d13b 100644 --- a/src/pdb.rs +++ b/src/pdb.rs @@ -356,7 +356,7 @@ impl<'s, S: Source<'s> + 's> PDB<'s, S> { /// /// # let symbol_table = pdb.global_symbols()?; /// # let symbol = symbol_table.iter().next()?.unwrap(); - /// # match symbol.parse() { Ok(pdb::SymbolData::PublicSymbol(pubsym)) => { + /// # match symbol.parse() { Ok(pdb::SymbolData::Public(pubsym)) => { /// // Obtain some section offset, eg from a symbol, and convert it /// match pubsym.offset.to_rva(&address_map) { /// Some(rva) => { diff --git a/src/symbol/annotations.rs b/src/symbol/annotations.rs new file mode 100644 index 0000000..86aa170 --- /dev/null +++ b/src/symbol/annotations.rs @@ -0,0 +1,263 @@ +use std::result; + +use crate::common::*; +use crate::FallibleIterator; + +/// These values correspond to the BinaryAnnotationOpcode enum from the +/// cvinfo.h +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum BinaryAnnotationOpcode { + /// link time pdb contains PADDINGs + Invalid = 0, + /// param : start offset + CodeOffset = 1, + /// param : nth separated code chunk (main code chunk == 0) + ChangeCodeOffsetBase = 2, + /// param : delta of offset + ChangeCodeOffset = 3, + /// param : length of code, default next start + ChangeCodeLength = 4, + /// param : fileId + ChangeFile = 5, + /// param : line offset (signed) + ChangeLineOffset = 6, + /// param : how many lines, default 1 + ChangeLineEndDelta = 7, + /// param : either 1 (default, for statement) + /// or 0 (for expression) + ChangeRangeKind = 8, + /// param : start column number, 0 means no column info + ChangeColumnStart = 9, + /// param : end column number delta (signed) + ChangeColumnEndDelta = 10, + /// param : ((sourceDelta << 4) | CodeDelta) + ChangeCodeOffsetAndLineOffset = 11, + /// param : codeLength, codeOffset + ChangeCodeLengthAndCodeOffset = 12, + /// param : end column number + ChangeColumnEnd = 13, +} + +impl From for BinaryAnnotationOpcode { + fn from(value: u32) -> Self { + match value { + 0 => BinaryAnnotationOpcode::Invalid, + 1 => BinaryAnnotationOpcode::CodeOffset, + 2 => BinaryAnnotationOpcode::ChangeCodeOffsetBase, + 3 => BinaryAnnotationOpcode::ChangeCodeOffset, + 4 => BinaryAnnotationOpcode::ChangeCodeLength, + 5 => BinaryAnnotationOpcode::ChangeFile, + 6 => BinaryAnnotationOpcode::ChangeLineOffset, + 7 => BinaryAnnotationOpcode::ChangeLineEndDelta, + 8 => BinaryAnnotationOpcode::ChangeRangeKind, + 9 => BinaryAnnotationOpcode::ChangeColumnStart, + 10 => BinaryAnnotationOpcode::ChangeColumnEndDelta, + 11 => BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset, + 12 => BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset, + 13 => BinaryAnnotationOpcode::ChangeColumnEnd, + _ => BinaryAnnotationOpcode::Invalid, + } + } +} + +/// Represents a parsed `BinaryAnnotation`. +/// +/// Binary annotations are used by `S_INLINESITE` to encode opcodes for how to +/// evaluate the state changes for inline information. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum BinaryAnnotation { + CodeOffset(u32), + ChangeCodeOffsetBase(u32), + ChangeCodeOffset(u32), + ChangeCodeLength(u32), + ChangeFile(u32), + ChangeLineOffset(i32), + ChangeLineEndDelta(u32), + ChangeRangeKind(u32), + ChangeColumnStart(u32), + ChangeColumnEndDelta(i32), + ChangeCodeOffsetAndLineOffset(i32, i32), + ChangeCodeLengthAndCodeOffset(u32, u32), + ChangeColumnEnd(u32), +} + +/// An iterator over binary annotations used by `S_INLINESITE`. +pub struct BinaryAnnotationsIter<'t> { + buffer: ParseBuffer<'t>, +} + +impl<'t> BinaryAnnotationsIter<'t> { + fn uncompress_next(&mut self) -> Result { + let b1 = u32::from(self.buffer.parse::()?); + if (b1 & 0x80) == 0x00 { + let value = b1; + return Ok(value); + } + + let b2 = u32::from(self.buffer.parse::()?); + if (b1 & 0xc0) == 0x80 { + let value = (b1 & 0x3f) << 8 | b2; + return Ok(value); + } + + let b3 = u32::from(self.buffer.parse::()?); + let b4 = u32::from(self.buffer.parse::()?); + if (b1 & 0xe0) == 0xc0 { + let value = ((b1 & 0x1f) << 24) | (b2 << 16) | (b3 << 8) | b4; + return Ok(value); + } + + Err(Error::InvalidCompressedAnnotation) + } +} + +/// Resembles `DecodeSignedInt32`. +fn decode_signed_operand(value: u32) -> i32 { + if value & 1 != 0 { + -((value >> 1) as i32) + } else { + (value >> 1) as i32 + } +} + +impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { + type Item = BinaryAnnotation; + type Error = Error; + + fn next(&mut self) -> result::Result, Self::Error> { + if self.buffer.is_empty() { + return Ok(None); + } + + let annotation = match BinaryAnnotationOpcode::from(self.uncompress_next()?) { + BinaryAnnotationOpcode::Invalid => { + // invalid opcodes mark the end of the stream. + self.buffer = ParseBuffer::default(); + return Ok(None); + } + BinaryAnnotationOpcode::CodeOffset => { + BinaryAnnotation::CodeOffset(self.uncompress_next()?) + } + BinaryAnnotationOpcode::ChangeCodeOffsetBase => { + BinaryAnnotation::ChangeCodeOffsetBase(self.uncompress_next()?) + } + BinaryAnnotationOpcode::ChangeCodeOffset => { + BinaryAnnotation::ChangeCodeOffset(self.uncompress_next()?) + } + BinaryAnnotationOpcode::ChangeCodeLength => { + BinaryAnnotation::ChangeCodeLength(self.uncompress_next()?) + } + BinaryAnnotationOpcode::ChangeFile => { + BinaryAnnotation::ChangeFile(self.uncompress_next()?) + } + BinaryAnnotationOpcode::ChangeLineOffset => { + BinaryAnnotation::ChangeLineOffset(decode_signed_operand(self.uncompress_next()?)) + } + BinaryAnnotationOpcode::ChangeLineEndDelta => { + BinaryAnnotation::ChangeLineEndDelta(self.uncompress_next()?) + } + BinaryAnnotationOpcode::ChangeRangeKind => { + BinaryAnnotation::ChangeRangeKind(self.uncompress_next()?) + } + BinaryAnnotationOpcode::ChangeColumnStart => { + BinaryAnnotation::ChangeColumnStart(self.uncompress_next()?) + } + BinaryAnnotationOpcode::ChangeColumnEndDelta => BinaryAnnotation::ChangeColumnEndDelta( + decode_signed_operand(self.uncompress_next()?), + ), + BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset => { + let operand = self.uncompress_next()?; + BinaryAnnotation::ChangeCodeOffsetAndLineOffset( + decode_signed_operand(operand >> 4), + decode_signed_operand(operand & 0xf), + ) + } + BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset => { + BinaryAnnotation::ChangeCodeLengthAndCodeOffset( + self.uncompress_next()?, + self.uncompress_next()?, + ) + } + BinaryAnnotationOpcode::ChangeColumnEnd => { + BinaryAnnotation::ChangeColumnEnd(self.uncompress_next()?) + } + }; + + Ok(Some(annotation)) + } +} + +/// Binary annotations of a symbol. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct BinaryAnnotations<'t> { + data: &'t [u8], +} + +impl<'t> BinaryAnnotations<'t> { + /// Creates a new instance of binary annotations. + pub(crate) fn new(data: &'t [u8]) -> Self { + BinaryAnnotations { data } + } + + /// Iterates through binary annotations. + pub fn iter(&self) -> BinaryAnnotationsIter<'t> { + BinaryAnnotationsIter { + buffer: ParseBuffer::from(self.data), + } + } +} + +#[test] +fn test_binary_annotation_iter() { + let inp = b"\x0b\x03\x06\n\x03\x08\x06\x06\x03-\x06\x08\x03\x07\x0br\x06\x06\x0c\x03\x07\x06\x0f\x0c\x06\x05\x00\x00"; + let annotations = BinaryAnnotations::new(inp) + .iter() + .collect::>() + .unwrap(); + + assert_eq!( + annotations, + vec![ + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(0, -1), + BinaryAnnotation::ChangeLineOffset(5), + BinaryAnnotation::ChangeCodeOffset(8), + BinaryAnnotation::ChangeLineOffset(3), + BinaryAnnotation::ChangeCodeOffset(45), + BinaryAnnotation::ChangeLineOffset(4), + BinaryAnnotation::ChangeCodeOffset(7), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(-3, 1), + BinaryAnnotation::ChangeLineOffset(3), + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(3, 7), + BinaryAnnotation::ChangeLineOffset(-7), + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(6, 5) + ] + ); + + let inp = b"\x03P\x06\x0e\x03\x0c\x06\x04\x032\x06\x06\x03T\x0b#\x0b\\\x0bC\x0b/\x06\x04\x0c-\t\x03;\x06\x1d\x0c\x05\x06\x00\x00"; + let annotations = BinaryAnnotations::new(inp) + .iter() + .collect::>() + .unwrap(); + + assert_eq!( + annotations, + vec![ + BinaryAnnotation::ChangeCodeOffset(80), + BinaryAnnotation::ChangeLineOffset(7), + BinaryAnnotation::ChangeCodeOffset(12), + BinaryAnnotation::ChangeLineOffset(2), + BinaryAnnotation::ChangeCodeOffset(50), + BinaryAnnotation::ChangeLineOffset(3), + BinaryAnnotation::ChangeCodeOffset(84), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(1, -1), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(-2, 6), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(2, -1), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(1, -7), + BinaryAnnotation::ChangeLineOffset(2), + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(45, 9), + BinaryAnnotation::ChangeCodeOffset(59), + BinaryAnnotation::ChangeLineOffset(-14), + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(5, 6), + ] + ); +} diff --git a/src/symbol/binary_annotations.rs b/src/symbol/binary_annotations.rs deleted file mode 100644 index 1209231..0000000 --- a/src/symbol/binary_annotations.rs +++ /dev/null @@ -1,184 +0,0 @@ -use std::result; - -use crate::common::*; -use crate::symbol::constants::*; -use crate::FallibleIterator; - -/// An iterator over binary annotations used by `S_INLINESITE` -pub struct BinaryAnnotationsIter<'t> { - buffer: &'t [u8], -} - -impl<'t> BinaryAnnotationsIter<'t> { - /// Initializes the iterator by parsing the buffer contents. - pub fn new(buffer: &'t [u8]) -> BinaryAnnotationsIter<'t> { - BinaryAnnotationsIter { buffer } - } - - fn pop_front(&mut self) -> Result { - let (first, rest) = self.buffer.split_first().ok_or(Error::UnexpectedEof)?; - self.buffer = rest; - Ok(*first) - } - - fn get_compressed_annotation(&mut self) -> Result { - let b1 = u32::from(self.pop_front()?); - if (b1 & 0x80) == 0 { - return Ok(b1); - } - let b2 = u32::from(self.pop_front()?); - if (b1 & 0xc0) == 0x80 { - return Ok((b1 & 0x3f) << 8 | b2); - } - let b3 = u32::from(self.pop_front()?); - let b4 = u32::from(self.pop_front()?); - if (b1 & 0xe0) == 0xc0 { - return Ok(((b1 & 0x1f) << 24) | (b2 << 16) | (b3 << 8) | b4); - } - Err(Error::UnexpectedEof) - } -} - -fn decode_signed_operand(value: u32) -> i32 { - if value & 1 != 0 { - -((value >> 1) as i32) - } else { - (value >> 1) as i32 - } -} - -impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { - type Item = BinaryAnnotation; - type Error = Error; - - fn next(&mut self) -> result::Result, Self::Error> { - if self.buffer.is_empty() { - return Ok(None); - } - Ok(Some( - match BinaryAnnotationOpcode::from(self.get_compressed_annotation()?) { - // invalid opcodes mark the end of the stream. - BinaryAnnotationOpcode::Invalid => { - self.buffer = &[][..]; - return Ok(None); - } - BinaryAnnotationOpcode::CodeOffset => { - BinaryAnnotation::CodeOffset(self.get_compressed_annotation()?) - } - BinaryAnnotationOpcode::ChangeCodeOffsetBase => { - BinaryAnnotation::ChangeCodeOffsetBase(self.get_compressed_annotation()?) - } - BinaryAnnotationOpcode::ChangeCodeOffset => { - BinaryAnnotation::ChangeCodeOffset(self.get_compressed_annotation()?) - } - BinaryAnnotationOpcode::ChangeCodeLength => { - BinaryAnnotation::ChangeCodeLength(self.get_compressed_annotation()?) - } - BinaryAnnotationOpcode::ChangeFile => { - BinaryAnnotation::ChangeFile(self.get_compressed_annotation()?) - } - BinaryAnnotationOpcode::ChangeLineOffset => BinaryAnnotation::ChangeLineOffset( - decode_signed_operand(self.get_compressed_annotation()?), - ), - BinaryAnnotationOpcode::ChangeLineEndDelta => { - BinaryAnnotation::ChangeLineEndDelta(self.get_compressed_annotation()?) - } - BinaryAnnotationOpcode::ChangeRangeKind => { - BinaryAnnotation::ChangeRangeKind(self.get_compressed_annotation()?) - } - BinaryAnnotationOpcode::ChangeColumnStart => { - BinaryAnnotation::ChangeColumnStart(self.get_compressed_annotation()?) - } - BinaryAnnotationOpcode::ChangeColumnEndDelta => { - BinaryAnnotation::ChangeColumnEndDelta(decode_signed_operand( - self.get_compressed_annotation()?, - )) - } - BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset => { - let annotation = self.get_compressed_annotation()?; - BinaryAnnotation::ChangeCodeOffsetAndLineOffset( - decode_signed_operand(annotation >> 4), - decode_signed_operand(annotation & 0xf), - ) - } - BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset => { - BinaryAnnotation::ChangeCodeLengthAndCodeOffset( - self.get_compressed_annotation()?, - self.get_compressed_annotation()?, - ) - } - BinaryAnnotationOpcode::ChangeColumnEnd => { - BinaryAnnotation::ChangeColumnEnd(self.get_compressed_annotation()?) - } - }, - )) - } -} - -/// Represents a parsed `BinaryAnnotation`. -/// -/// Binary annotations are used by `S_INLINESITE` to encode opcodes for how to -/// evaluate the state changes for inline information. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum BinaryAnnotation { - CodeOffset(u32), - ChangeCodeOffsetBase(u32), - ChangeCodeOffset(u32), - ChangeCodeLength(u32), - ChangeFile(u32), - ChangeLineOffset(i32), - ChangeLineEndDelta(u32), - ChangeRangeKind(u32), - ChangeColumnStart(u32), - ChangeColumnEndDelta(i32), - ChangeCodeOffsetAndLineOffset(i32, i32), - ChangeCodeLengthAndCodeOffset(u32, u32), - ChangeColumnEnd(u32), -} - -#[test] -fn test_binary_annotation_iter() { - let inp = b"\x0b\x03\x06\n\x03\x08\x06\x06\x03-\x06\x08\x03\x07\x0br\x06\x06\x0c\x03\x07\x06\x0f\x0c\x06\x05\x00\x00"; - let items = BinaryAnnotationsIter::new(inp).collect::>().unwrap(); - assert_eq!( - items, - vec![ - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(0, -1), - BinaryAnnotation::ChangeLineOffset(5), - BinaryAnnotation::ChangeCodeOffset(8), - BinaryAnnotation::ChangeLineOffset(3), - BinaryAnnotation::ChangeCodeOffset(45), - BinaryAnnotation::ChangeLineOffset(4), - BinaryAnnotation::ChangeCodeOffset(7), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(-3, 1), - BinaryAnnotation::ChangeLineOffset(3), - BinaryAnnotation::ChangeCodeLengthAndCodeOffset(3, 7), - BinaryAnnotation::ChangeLineOffset(-7), - BinaryAnnotation::ChangeCodeLengthAndCodeOffset(6, 5) - ] - ); - - let inp = b"\x03P\x06\x0e\x03\x0c\x06\x04\x032\x06\x06\x03T\x0b#\x0b\\\x0bC\x0b/\x06\x04\x0c-\t\x03;\x06\x1d\x0c\x05\x06\x00\x00"; - let items = BinaryAnnotationsIter::new(inp).collect::>().unwrap(); - assert_eq!( - items, - vec![ - BinaryAnnotation::ChangeCodeOffset(80), - BinaryAnnotation::ChangeLineOffset(7), - BinaryAnnotation::ChangeCodeOffset(12), - BinaryAnnotation::ChangeLineOffset(2), - BinaryAnnotation::ChangeCodeOffset(50), - BinaryAnnotation::ChangeLineOffset(3), - BinaryAnnotation::ChangeCodeOffset(84), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(1, -1), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(-2, 6), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(2, -1), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(1, -7), - BinaryAnnotation::ChangeLineOffset(2), - BinaryAnnotation::ChangeCodeLengthAndCodeOffset(45, 9), - BinaryAnnotation::ChangeCodeOffset(59), - BinaryAnnotation::ChangeLineOffset(-14), - BinaryAnnotation::ChangeCodeLengthAndCodeOffset(5, 6), - ] - ); -} diff --git a/src/symbol/constants.rs b/src/symbol/constants.rs index 874915f..3f257f2 100644 --- a/src/symbol/constants.rs +++ b/src/symbol/constants.rs @@ -13,6 +13,8 @@ use std::fmt; +use scroll::{ctx::TryFromCtx, Endian}; + pub const S_COMPILE: u16 = 0x0001; // Compile flags symbol pub const S_REGISTER_16t: u16 = 0x0002; // Register variable pub const S_CONSTANT_16t: u16 = 0x0003; // constant symbol @@ -467,6 +469,15 @@ impl From for CPUType { } } +impl<'a> TryFromCtx<'a, Endian> for CPUType { + type Error = scroll::Error; + type Size = usize; + + fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + u16::try_from_ctx(this, le).map(|(v, l)| (v.into(), l)) + } +} + /// These values correspond to the CV_CFL_LANG enumeration, and are documented /// here: https://msdn.microsoft.com/en-us/library/bw3aekw6.aspx #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -545,59 +556,11 @@ impl From for SourceLanguage { } } -/// These values correspond to the BinaryAnnotationOpcode enum from the -/// cvinfo.h -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum BinaryAnnotationOpcode { - /// link time pdb contains PADDINGs - Invalid = 0, - /// param : start offset - CodeOffset = 1, - /// param : nth separated code chunk (main code chunk == 0) - ChangeCodeOffsetBase = 2, - /// param : delta of offset - ChangeCodeOffset = 3, - /// param : length of code, default next start - ChangeCodeLength = 4, - /// param : fileId - ChangeFile = 5, - /// param : line offset (signed) - ChangeLineOffset = 6, - /// param : how many lines, default 1 - ChangeLineEndDelta = 7, - /// param : either 1 (default, for statement) - /// or 0 (for expression) - ChangeRangeKind = 8, - /// param : start column number, 0 means no column info - ChangeColumnStart = 9, - /// param : end column number delta (signed) - ChangeColumnEndDelta = 10, - /// param : ((sourceDelta << 4) | CodeDelta) - ChangeCodeOffsetAndLineOffset = 11, - /// param : codeLength, codeOffset - ChangeCodeLengthAndCodeOffset = 12, - /// param : end column number - ChangeColumnEnd = 13, -} +impl<'a> TryFromCtx<'a, Endian> for SourceLanguage { + type Error = scroll::Error; + type Size = usize; -impl From for BinaryAnnotationOpcode { - fn from(value: u32) -> Self { - match value { - 0 => BinaryAnnotationOpcode::Invalid, - 1 => BinaryAnnotationOpcode::CodeOffset, - 2 => BinaryAnnotationOpcode::ChangeCodeOffsetBase, - 3 => BinaryAnnotationOpcode::ChangeCodeOffset, - 4 => BinaryAnnotationOpcode::ChangeCodeLength, - 5 => BinaryAnnotationOpcode::ChangeFile, - 6 => BinaryAnnotationOpcode::ChangeLineOffset, - 7 => BinaryAnnotationOpcode::ChangeLineEndDelta, - 8 => BinaryAnnotationOpcode::ChangeRangeKind, - 9 => BinaryAnnotationOpcode::ChangeColumnStart, - 10 => BinaryAnnotationOpcode::ChangeColumnEndDelta, - 11 => BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset, - 12 => BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset, - 13 => BinaryAnnotationOpcode::ChangeColumnEnd, - _ => BinaryAnnotationOpcode::Invalid, - } + fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + u8::try_from_ctx(this, le).map(|(v, l)| (v.into(), l)) } } diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 7549b77..a8e908d 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -8,16 +8,22 @@ use std::fmt; use std::result; +use scroll::{ctx::TryFromCtx, Endian, Pread, LE}; + use crate::common::*; use crate::msf::*; use crate::FallibleIterator; -mod binary_annotations; +mod annotations; mod constants; -pub use self::binary_annotations::*; use self::constants::*; +pub use self::annotations::*; + +/// The raw type discriminator for `Symbols`. +pub type SymbolKind = u16; + /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, /// constants, data types, and more. /// @@ -40,10 +46,10 @@ use self::constants::*; /// let mut symbols = symbol_table.iter(); /// while let Some(symbol) = symbols.next()? { /// match symbol.parse() { -/// Ok(pdb::SymbolData::PublicSymbol(data)) if data.function => { +/// Ok(pdb::SymbolData::Public(data)) if data.function => { /// // we found the location of a function! /// let rva = data.offset.to_rva(&address_map).unwrap_or_default(); -/// println!("{} is {}", rva, symbol.name()?); +/// println!("{} is {}", rva, data.name); /// # count += 1; /// } /// _ => {} @@ -84,129 +90,41 @@ pub struct Symbol<'t>(&'t [u8]); impl<'t> Symbol<'t> { /// Returns the kind of symbol identified by this Symbol. #[inline] - pub fn raw_kind(&self) -> u16 { + pub fn raw_kind(&self) -> SymbolKind { debug_assert!(self.0.len() >= 2); - - // assemble a little-endian u16 - u16::from(self.0[0]) | (u16::from(self.0[1]) << 8) + self.0.pread_with(0, LE).unwrap_or_default() } - /// Returns the raw bytes of this symbol record, including the symbol type but not including - /// the preceding symbol length indicator. + /// Returns the raw bytes of this symbol record, including the symbol type and extra data, but + /// not including the preceding symbol length indicator. + #[inline] pub fn raw_bytes(&self) -> &'t [u8] { self.0 } - /// Returns the size of the fixed-size fields for this kind of symbol. This permits other - /// accessors to extract the fields independent from the names. - fn data_length(&self) -> Result { - let kind = self.raw_kind(); - - let data_length = match kind { - S_PUB32 | S_PUB32_ST => 10, - - S_LDATA32 | S_LDATA32_ST | S_GDATA32 | S_GDATA32_ST | S_LMANDATA | S_LMANDATA_ST - | S_GMANDATA | S_GMANDATA_ST => 10, - - S_PROCREF | S_PROCREF_ST | S_LPROCREF | S_LPROCREF_ST | S_DATAREF | S_DATAREF_ST - | S_ANNOTATIONREF => 10, - - S_CONSTANT | S_CONSTANT_ST => { - let mut constant_size = 4; - - let mut buf = ParseBuffer::from(&self.0[2 + constant_size..]); - constant_size += buf.get_variant_size(); - - constant_size - } - - S_UDT | S_UDT_ST => 4, - - S_LTHREAD32 | S_LTHREAD32_ST | S_GTHREAD32 | S_GTHREAD32_ST => 10, - - S_LPROC32 | S_LPROC32_ST | S_GPROC32 | S_GPROC32_ST | S_LPROC32_ID | S_GPROC32_ID - | S_LPROC32_DPC | S_LPROC32_DPC_ID => 35, - - S_INLINESITE => 12, - S_INLINESITE2 => 16, - - S_OBJNAME | S_OBJNAME_ST => 4, - - S_COMPILE3 => 22, - - S_UNAMESPACE | S_UNAMESPACE_ST => 0, - - S_LOCAL => 6, - - S_EXPORT => 4, - - _ => return Err(Error::UnimplementedSymbolKind(kind)), - }; - - if self.0.len() < data_length + 2 { - return Err(Error::SymbolTooShort); - } - - Ok(data_length) - } - /// Parse the symbol into the `SymbolData` it contains. #[inline] - pub fn parse(&self) -> Result { - parse_symbol_data(self.raw_kind(), self.field_data()?) + pub fn parse(&self) -> Result> { + Ok(self.raw_bytes().pread_with(0, ())?) } - /// Returns a slice containing the field information describing this symbol but not including - /// its name. - fn field_data(&self) -> Result<&'t [u8]> { - let data_length = self.data_length()?; - - // we've already checked the length - Ok(&self.0[2..(data_length + 2)]) - } - - /// Returns additional data stored in the symbol. - pub fn extra_data(&self) -> Result> { - let data_length = self.data_length()?; - let buf = &self.0[2 + data_length..]; - + /// Returns whether this symbol starts a scope. + /// + /// If `true`, this symbol has a `parent` and an `end` field, which contains the offset of the + /// corrsponding end symbol. + pub fn starts_scope(&self) -> bool { match self.raw_kind() { - S_INLINESITE | S_INLINESITE2 => Ok(Some(buf)), - _ => Ok(None), + S_GPROC32 | S_LPROC32 | S_LPROC32_ID | S_GPROC32_ID | S_BLOCK32 | S_SEPCODE + | S_THUNK32 | S_INLINESITE | S_INLINESITE2 => true, + _ => false, } } - /// Interprets the extra data as binary annotations and - /// returns an iterator over it. - pub fn iter_binary_annotations(&self) -> Result> { - Ok(BinaryAnnotationsIter::new( - self.extra_data()?.unwrap_or(&[][..]), - )) - } - - /// Returns the name of the symbol. Note that the underlying buffer is owned by the - /// `SymbolTable`. - pub fn name(&self) -> Result> { - // figure out how long the data is - let data_length = self.data_length()?; - - // figure out where the name is - let mut buf = ParseBuffer::from(&self.0[2 + data_length..]); - - // some things do not have a real name but store something else - // there instead. + /// Returns whether this symbol declares the end of a scope. + pub fn ends_scope(&self) -> bool { match self.raw_kind() { - S_INLINESITE | S_INLINESITE2 => Ok(RawString::from("")), - kind if kind < S_ST_MAX => { - // Pascal-style name - let name = buf.parse_u8_pascal_string()?; - Ok(name) - } - _ => { - // NUL-terminated name - let name = buf.parse_cstring()?; - Ok(name) - } + S_END | S_PROC_ID_END | S_INLINESITE_END => true, + _ => false, } } } @@ -229,194 +147,70 @@ impl<'t> fmt::Debug for Symbol<'t> { // decoding reference: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/cvdump/dumpsym7.cpp#L264 -// CV_PUBSYMFLAGS_e: -const CVPSF_CODE: u32 = 0x1; -const CVPSF_FUNCTION: u32 = 0x2; -const CVPSF_MANAGED: u32 = 0x4; -const CVPSF_MSIL: u32 = 0x8; - -fn parse_symbol_data(kind: u16, data: &[u8]) -> Result { - let mut buf = ParseBuffer::from(data); - - match kind { - S_PUB32 | S_PUB32_ST => { - let flags = buf.parse_u32()?; - Ok(SymbolData::PublicSymbol(PublicSymbol { - code: flags & CVPSF_CODE != 0, - function: flags & CVPSF_FUNCTION != 0, - managed: flags & CVPSF_MANAGED != 0, - msil: flags & CVPSF_MSIL != 0, - offset: PdbInternalSectionOffset { - offset: buf.parse_u32()?, - section: buf.parse_u16()?, - }, - })) - } - - S_LDATA32 | S_LDATA32_ST | S_GDATA32 | S_GDATA32_ST | S_LMANDATA | S_LMANDATA_ST - | S_GMANDATA | S_GMANDATA_ST => Ok(SymbolData::DataSymbol(DataSymbol { - global: match kind { - S_GDATA32 | S_GDATA32_ST | S_GMANDATA | S_GMANDATA_ST => true, - _ => false, - }, - managed: match kind { - S_LMANDATA | S_LMANDATA_ST | S_GMANDATA | S_GMANDATA_ST => true, - _ => false, - }, - type_index: buf.parse_u32()?, - offset: PdbInternalSectionOffset { - offset: buf.parse_u32()?, - section: buf.parse_u16()?, - }, - })), - - S_PROCREF | S_PROCREF_ST | S_LPROCREF | S_LPROCREF_ST => { - Ok(SymbolData::ProcedureReference(ProcedureReferenceSymbol { - global: match kind { - S_PROCREF | S_PROCREF_ST => true, - _ => false, - }, - sum_name: buf.parse_u32()?, - symbol_index: buf.parse_u32()?, - module: buf.parse_u16()?, - })) - } - - S_DATAREF | S_DATAREF_ST => Ok(SymbolData::DataReference(DataReferenceSymbol { - sum_name: buf.parse_u32()?, - symbol_index: buf.parse_u32()?, - module: buf.parse_u16()?, - })), - - S_ANNOTATIONREF => Ok(SymbolData::AnnotationReference(AnnotationReferenceSymbol { - sum_name: buf.parse_u32()?, - symbol_index: buf.parse_u32()?, - module: buf.parse_u16()?, - })), - - S_CONSTANT | S_CONSTANT_ST => Ok(SymbolData::Constant(ConstantSymbol { - type_index: buf.parse_u32()?, - value: buf.parse_variant()?, - })), - - S_UDT | S_UDT_ST => Ok(SymbolData::UserDefinedType(UserDefinedTypeSymbol { - type_index: buf.parse_u32()?, - })), - - S_LTHREAD32 | S_LTHREAD32_ST | S_GTHREAD32 | S_GTHREAD32_ST => { - Ok(SymbolData::ThreadStorage(ThreadStorageSymbol { - global: match kind { - S_GTHREAD32 | S_GTHREAD32_ST => true, - _ => false, - }, - type_index: buf.parse_u32()?, - offset: PdbInternalSectionOffset { - offset: buf.parse_u32()?, - section: buf.parse_u16()?, - }, - })) - } +fn parse_symbol_name<'t>(buf: &mut ParseBuffer<'t>, kind: SymbolKind) -> Result> { + if kind < S_ST_MAX { + // Pascal-style name + buf.parse_u8_pascal_string() + } else { + // NUL-terminated name + buf.parse_cstring() + } +} - S_LPROC32 | S_LPROC32_ST | S_GPROC32 | S_GPROC32_ST | S_LPROC32_ID | S_GPROC32_ID - | S_LPROC32_DPC | S_LPROC32_DPC_ID => Ok(SymbolData::Procedure(ProcedureSymbol { - global: match kind { - S_GPROC32 | S_GPROC32_ST | S_GPROC32_ID => true, - _ => false, - }, - parent: buf.parse_u32()?, - end: buf.parse_u32()?, - next: buf.parse_u32()?, - len: buf.parse_u32()?, - dbg_start_offset: buf.parse_u32()?, - dbg_end_offset: buf.parse_u32()?, - type_index: buf.parse_u32()?, - offset: PdbInternalSectionOffset { - offset: buf.parse_u32()?, - section: buf.parse_u16()?, - }, - flags: ProcedureFlags::new(buf.parse_u8()?), - })), - - S_INLINESITE | S_INLINESITE2 => Ok(SymbolData::InlineSite(InlineSite { - parent: buf.parse_u32()?, - end: buf.parse_u32()?, - inlinee: buf.parse_u32()?, - invocations: if kind == S_INLINESITE2 { - Some(buf.parse_u32()?) - } else { - None - }, - })), - - S_OBJNAME | S_OBJNAME_ST => Ok(SymbolData::ObjName(ObjNameSymbol { - signature: buf.parse_u32()?, - })), - - S_COMPILE3 => Ok(SymbolData::Compile3(Compile3Symbol { - language: buf.parse_u8()?.into(), - flags: [buf.parse_u8()?, buf.parse_u8()?, buf.parse_u8()?], - cpu_type: buf.parse_u16()?.into(), - frontend_version: [ - buf.parse_u16()?, - buf.parse_u16()?, - buf.parse_u16()?, - buf.parse_u16()?, - ], - backend_version: [ - buf.parse_u16()?, - buf.parse_u16()?, - buf.parse_u16()?, - buf.parse_u16()?, - ], - })), - - S_UNAMESPACE | S_UNAMESPACE_ST => Ok(SymbolData::Namespace(NamespaceSymbol {})), - - S_LOCAL => Ok(SymbolData::Local(LocalSymbol { - type_index: buf.parse_u32()?, - flags: LocalVariableFlags::new(buf.parse_u16()?), - })), - - S_EXPORT => Ok(SymbolData::Export(ExportSymbol { - ordinal: buf.parse_u16()?, - flags: ExportSymbolFlags::new(buf.parse_u16()?), - })), - - _ => Err(Error::UnimplementedSymbolKind(kind)), +fn parse_optional_name<'t>( + buf: &mut ParseBuffer<'t>, + kind: SymbolKind, +) -> Result>> { + if kind < S_ST_MAX { + // ST variants do not specify a name + Ok(None) + } else { + // NUL-terminated name + buf.parse_cstring().map(Some) } } /// `SymbolData` contains the information parsed from a symbol record. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum SymbolData { +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum SymbolData<'t> { + // S_END (0x0006) + ScopeEnd, + + // S_REGISTER (0x1106) | S_REGISTER_ST (0x1001) + RegisterVariable(RegisterVariableSymbol<'t>), + + // S_MANYREG (0x110a) | S_MANYREG_ST (0x1005) + // S_MANYREG2 (0x1117) | S_MANYREG2_ST (0x1014) + MultiRegisterVariable(MultiRegisterVariableSymbol<'t>), + // S_PUB32 (0x110e) | S_PUB32_ST (0x1009) - PublicSymbol(PublicSymbol), + Public(PublicSymbol<'t>), // S_LDATA32 (0x110c) | S_LDATA32_ST (0x1007) // S_GDATA32 (0x110d) | S_GDATA32_ST (0x1008) // S_LMANDATA (0x111c) | S_LMANDATA_ST (0x1020) // S_GMANDATA (0x111d) | S_GMANDATA_ST (0x1021) - DataSymbol(DataSymbol), + Data(DataSymbol<'t>), // S_PROCREF (0x1125) | S_PROCREF_ST (0x0400) // S_LPROCREF (0x1127) | S_LPROCREF_ST (0x0403) - ProcedureReference(ProcedureReferenceSymbol), + ProcedureReference(ProcedureReferenceSymbol<'t>), // S_DATAREF (0x1126) | S_DATAREF_ST (0x0401) - DataReference(DataReferenceSymbol), + DataReference(DataReferenceSymbol<'t>), // S_ANNOTATIONREF (0x1128) - AnnotationReference(AnnotationReferenceSymbol), + AnnotationReference(AnnotationReferenceSymbol<'t>), // S_CONSTANT (0x1107) | S_CONSTANT_ST (0x1002) - Constant(ConstantSymbol), + Constant(ConstantSymbol<'t>), // S_UDT (0x1108) | S_UDT_ST (0x1003) - UserDefinedType(UserDefinedTypeSymbol), + UserDefinedType(UserDefinedTypeSymbol<'t>), // S_LTHREAD32 (0x1112) | S_LTHREAD32_ST (0x100e) // S_GTHREAD32 (0x1113) | S_GTHREAD32_ST (0x100f) - ThreadStorage(ThreadStorageSymbol), + ThreadStorage(ThreadStorageSymbol<'t>), // S_LPROC32 (0x110f) | S_LPROC32_ST (0x100a) // S_GPROC32 (0x1110) | S_GPROC32_ST (0x100b) @@ -424,94 +218,426 @@ pub enum SymbolData { // S_GPROC32_ID (0x1147) | // S_LPROC32_DPC (0x1155) | // S_LPROC32_DPC_ID (0x1156) - Procedure(ProcedureSymbol), + Procedure(ProcedureSymbol<'t>), + + // S_PROC_ID_END (0x114f) + ProcedureEnd, // S_INLINESITE (0x114d) - InlineSite(InlineSite), + InlineSite(InlineSiteSymbol<'t>), + + // S_INLINESITE_END (0x114e) + InlineSiteEnd, // S_OBJNAME (0x1101) | S_OBJNAME_ST (0x0009) - ObjName(ObjNameSymbol), + ObjName(ObjNameSymbol<'t>), - // S_COMPILE3 (0x113c) - Compile3(Compile3Symbol), + // S_COMPILE2 (0x1116) | S_COMPILE2_ST (0x1013) | S_COMPILE3 (0x113c) + ExtendedCompileFlags(ExtendedCompileFlagsSymbol), // S_UNAMESPACE (0x1124) | S_UNAMESPACE_ST (0x1029) - Namespace(NamespaceSymbol), + UsingNamespace(UsingNamespaceSymbol<'t>), // S_LOCAL (0x113e) - Local(LocalSymbol), + Local(LocalSymbol<'t>), // S_EXPORT (0x1138) - Export(ExportSymbol), + Export(ExportSymbol<'t>), +} + +impl<'t> SymbolData<'t> { + /// Returns the name of this symbol if it has one. + pub fn name(&self) -> Option> { + match self { + SymbolData::ScopeEnd => None, + SymbolData::RegisterVariable(_) => None, + SymbolData::MultiRegisterVariable(_) => None, + SymbolData::Public(data) => Some(data.name), + SymbolData::Data(data) => Some(data.name), + SymbolData::ProcedureReference(data) => data.name, + SymbolData::DataReference(data) => data.name, + SymbolData::AnnotationReference(data) => Some(data.name), + SymbolData::Constant(data) => Some(data.name), + SymbolData::UserDefinedType(data) => Some(data.name), + SymbolData::ThreadStorage(data) => Some(data.name), + SymbolData::Procedure(data) => Some(data.name), + SymbolData::ProcedureEnd => None, + SymbolData::InlineSite(_) => None, + SymbolData::InlineSiteEnd => None, + SymbolData::ObjName(data) => Some(data.name), + SymbolData::ExtendedCompileFlags(_) => None, + SymbolData::UsingNamespace(data) => Some(data.name), + SymbolData::Local(data) => Some(data.name), + SymbolData::Export(data) => Some(data.name), + } + } +} + +impl<'t> TryFromCtx<'t> for SymbolData<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], _ctx: ()) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + let kind = buf.parse()?; + + let symbol = match kind { + S_END => SymbolData::ScopeEnd, + S_REGISTER | S_REGISTER_ST => SymbolData::RegisterVariable(buf.parse_with(kind)?), + S_MANYREG | S_MANYREG_ST | S_MANYREG2 | S_MANYREG2_ST => { + SymbolData::MultiRegisterVariable(buf.parse_with(kind)?) + } + S_PUB32 | S_PUB32_ST => SymbolData::Public(buf.parse_with(kind)?), + S_LDATA32 | S_LDATA32_ST | S_GDATA32 | S_GDATA32_ST | S_LMANDATA | S_LMANDATA_ST + | S_GMANDATA | S_GMANDATA_ST => SymbolData::Data(buf.parse_with(kind)?), + S_PROCREF | S_PROCREF_ST | S_LPROCREF | S_LPROCREF_ST => { + SymbolData::ProcedureReference(buf.parse_with(kind)?) + } + S_DATAREF | S_DATAREF_ST => SymbolData::DataReference(buf.parse_with(kind)?), + S_ANNOTATIONREF => SymbolData::AnnotationReference(buf.parse_with(kind)?), + S_CONSTANT | S_CONSTANT_ST | S_MANCONSTANT => { + SymbolData::Constant(buf.parse_with(kind)?) + } + S_UDT | S_UDT_ST | S_COBOLUDT | S_COBOLUDT_ST => { + SymbolData::UserDefinedType(buf.parse_with(kind)?) + } + S_LTHREAD32 | S_LTHREAD32_ST | S_GTHREAD32 | S_GTHREAD32_ST => { + SymbolData::ThreadStorage(buf.parse_with(kind)?) + } + S_LPROC32 | S_LPROC32_ST | S_GPROC32 | S_GPROC32_ST | S_LPROC32_ID | S_GPROC32_ID + | S_LPROC32_DPC | S_LPROC32_DPC_ID => SymbolData::Procedure(buf.parse_with(kind)?), + S_PROC_ID_END => SymbolData::ProcedureEnd, + S_INLINESITE | S_INLINESITE2 => SymbolData::InlineSite(buf.parse_with(kind)?), + S_INLINESITE_END => SymbolData::InlineSiteEnd, + S_OBJNAME | S_OBJNAME_ST => SymbolData::ObjName(buf.parse_with(kind)?), + S_COMPILE2 | S_COMPILE2_ST | S_COMPILE3 => { + SymbolData::ExtendedCompileFlags(buf.parse_with(kind)?) + } + S_UNAMESPACE | S_UNAMESPACE_ST => SymbolData::UsingNamespace(buf.parse_with(kind)?), + S_LOCAL => SymbolData::Local(buf.parse_with(kind)?), + S_EXPORT => SymbolData::Export(buf.parse_with(kind)?), + other => return Err(Error::UnimplementedSymbolKind(other)), + }; + + Ok((symbol, buf.pos())) + } +} + +/// A register referred to by its number. +pub type Register = u16; + +/// A Register variable. +/// +/// `S_REGISTER`, or `S_REGISTER_ST` +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RegisterVariableSymbol<'t> { + pub type_index: TypeIndex, + pub register: Register, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for RegisterVariableSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = RegisterVariableSymbol { + type_index: buf.parse()?, + register: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + +/// A Register variable spanning multiple registers. +/// +/// `S_MANYREG`, `S_MANYREG_ST`, `S_MANYREG2`, or `S_MANYREG2_ST` +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MultiRegisterVariableSymbol<'t> { + pub type_index: TypeIndex, + /// Most significant register first. + pub registers: Vec<(Register, RawString<'t>)>, } +impl<'t> TryFromCtx<'t, SymbolKind> for MultiRegisterVariableSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let type_index = buf.parse()?; + let count = match kind { + S_MANYREG2 | S_MANYREG2_ST => buf.parse::()?, + _ => u16::from(buf.parse::()?), + }; + + let mut registers = Vec::with_capacity(count as usize); + for _ in 0..count { + registers.push((buf.parse()?, parse_symbol_name(&mut buf, kind)?)); + } + + let symbol = MultiRegisterVariableSymbol { + type_index, + registers, + }; + + Ok((symbol, buf.pos())) + } +} + +// CV_PUBSYMFLAGS_e +const CVPSF_CODE: u32 = 0x1; +const CVPSF_FUNCTION: u32 = 0x2; +const CVPSF_MANAGED: u32 = 0x4; +const CVPSF_MSIL: u32 = 0x8; + /// The information parsed from a symbol record with kind `S_PUB32` or `S_PUB32_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct PublicSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct PublicSymbol<'t> { pub code: bool, pub function: bool, pub managed: bool, pub msil: bool, pub offset: PdbInternalSectionOffset, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for PublicSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let flags = buf.parse::()?; + let symbol = PublicSymbol { + code: flags & CVPSF_CODE != 0, + function: flags & CVPSF_FUNCTION != 0, + managed: flags & CVPSF_MANAGED != 0, + msil: flags & CVPSF_MSIL != 0, + offset: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind /// `S_LDATA32`, `S_LDATA32_ST`, `S_GDATA32`, `S_GDATA32_ST`, /// `S_LMANDATA`, `S_LMANDATA_ST`, `S_GMANDATA`, or `S_GMANDATA_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct DataSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct DataSymbol<'t> { pub global: bool, pub managed: bool, pub type_index: TypeIndex, pub offset: PdbInternalSectionOffset, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for DataSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let global = match kind { + S_GDATA32 | S_GDATA32_ST | S_GMANDATA | S_GMANDATA_ST => true, + _ => false, + }; + let managed = match kind { + S_LMANDATA | S_LMANDATA_ST | S_GMANDATA | S_GMANDATA_ST => true, + _ => false, + }; + + let symbol = DataSymbol { + global, + managed, + type_index: buf.parse()?, + offset: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind /// `S_PROCREF`, `S_PROCREF_ST`, `S_LPROCREF`, or `S_LPROCREF_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct ProcedureReferenceSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ProcedureReferenceSymbol<'t> { pub global: bool, pub sum_name: u32, pub symbol_index: u32, pub module: u16, + pub name: Option>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureReferenceSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let global = match kind { + S_PROCREF | S_PROCREF_ST => true, + _ => false, + }; + + let symbol = ProcedureReferenceSymbol { + global, + sum_name: buf.parse()?, + symbol_index: buf.parse()?, + module: buf.parse()?, + name: parse_optional_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind `S_DATAREF` or `S_DATAREF_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct DataReferenceSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct DataReferenceSymbol<'t> { pub sum_name: u32, pub symbol_index: u32, pub module: u16, + pub name: Option>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for DataReferenceSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = DataReferenceSymbol { + sum_name: buf.parse()?, + symbol_index: buf.parse()?, + module: buf.parse()?, + name: parse_optional_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind `S_ANNOTATIONREF`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct AnnotationReferenceSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct AnnotationReferenceSymbol<'t> { pub sum_name: u32, pub symbol_index: u32, pub module: u16, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for AnnotationReferenceSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = AnnotationReferenceSymbol { + sum_name: buf.parse()?, + symbol_index: buf.parse()?, + module: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind `S_CONSTANT`, or `S_CONSTANT_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct ConstantSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ConstantSymbol<'t> { + pub managed: bool, pub type_index: TypeIndex, pub value: Variant, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ConstantSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = ConstantSymbol { + managed: kind == S_MANCONSTANT, + type_index: buf.parse()?, + value: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind `S_UDT`, or `S_UDT_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct UserDefinedTypeSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct UserDefinedTypeSymbol<'t> { pub type_index: TypeIndex, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for UserDefinedTypeSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = UserDefinedTypeSymbol { + type_index: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind /// `S_LTHREAD32`, `S_LTHREAD32_ST`, `S_GTHREAD32`, or `S_GTHREAD32_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct ThreadStorageSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ThreadStorageSymbol<'t> { pub global: bool, pub type_index: TypeIndex, pub offset: PdbInternalSectionOffset, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ThreadStorageSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let global = match kind { + S_GTHREAD32 | S_GTHREAD32_ST => true, + _ => false, + }; + + let symbol = ThreadStorageSymbol { + global, + type_index: buf.parse()?, + offset: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } // CV_PROCFLAGS: @@ -525,7 +651,7 @@ const CV_PFLAG_NOINLINE: u8 = 0x40; const CV_PFLAG_OPTDBGINFO: u8 = 0x80; /// The information parsed from a CV_PROCFLAGS bit field -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ProcedureFlags { pub nofpo: bool, pub int: bool, @@ -537,26 +663,33 @@ pub struct ProcedureFlags { pub optdbginfo: bool, } -impl ProcedureFlags { - fn new(flags: u8) -> Self { - ProcedureFlags { - nofpo: flags & CV_PFLAG_NOFPO != 0, - int: flags & CV_PFLAG_INT != 0, - far: flags & CV_PFLAG_FAR != 0, - never: flags & CV_PFLAG_NEVER != 0, - notreached: flags & CV_PFLAG_NOTREACHED != 0, - cust_call: flags & CV_PFLAG_CUST_CALL != 0, - noinline: flags & CV_PFLAG_NOINLINE != 0, - optdbginfo: flags & CV_PFLAG_OPTDBGINFO != 0, - } +impl<'t> TryFromCtx<'t, Endian> for ProcedureFlags { + type Error = scroll::Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + let (value, size) = u8::try_from_ctx(this, le)?; + + let flags = ProcedureFlags { + nofpo: value & CV_PFLAG_NOFPO != 0, + int: value & CV_PFLAG_INT != 0, + far: value & CV_PFLAG_FAR != 0, + never: value & CV_PFLAG_NEVER != 0, + notreached: value & CV_PFLAG_NOTREACHED != 0, + cust_call: value & CV_PFLAG_CUST_CALL != 0, + noinline: value & CV_PFLAG_NOINLINE != 0, + optdbginfo: value & CV_PFLAG_OPTDBGINFO != 0, + }; + + Ok((flags, size)) } } /// The information parsed from a symbol record with kind /// `S_GPROC32`, `S_GPROC32_ST`, `S_LPROC32`, `S_LPROC32_ST` /// `S_GPROC32_ID`, `S_LPROC32_ID`, `S_LPROC32_DPC`, or `S_LPROC32_DPC_ID` -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct ProcedureSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ProcedureSymbol<'t> { pub global: bool, pub parent: u32, pub end: u32, @@ -567,40 +700,233 @@ pub struct ProcedureSymbol { pub type_index: TypeIndex, pub offset: PdbInternalSectionOffset, pub flags: ProcedureFlags, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let global = match kind { + S_GPROC32 | S_GPROC32_ST | S_GPROC32_ID => true, + _ => false, + }; + + let symbol = ProcedureSymbol { + global, + parent: buf.parse()?, + end: buf.parse()?, + next: buf.parse()?, + len: buf.parse()?, + dbg_start_offset: buf.parse()?, + dbg_end_offset: buf.parse()?, + type_index: buf.parse()?, + offset: buf.parse()?, + flags: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind /// `S_INLINESITE` or `S_INLINESITE2`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct InlineSite { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct InlineSiteSymbol<'t> { pub parent: u32, pub end: u32, pub inlinee: ItemId, pub invocations: Option, + pub annotations: BinaryAnnotations<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for InlineSiteSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = InlineSiteSymbol { + parent: buf.parse()?, + end: buf.parse()?, + inlinee: buf.parse()?, + invocations: match kind { + S_INLINESITE2 => Some(buf.parse()?), + _ => None, + }, + annotations: BinaryAnnotations::new(buf.take(buf.len())?), + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind /// `S_OBJNAME`, or `S_OBJNAME_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct ObjNameSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ObjNameSymbol<'t> { pub signature: u32, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ObjNameSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = ObjNameSymbol { + signature: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + +/// A version number refered to by `ExtendedCompileFlagsSymbol`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct CompilerVersion { + pub major: u16, + pub minor: u16, + pub build: u16, + pub qfe: Option, +} + +impl<'t> TryFromCtx<'t, bool> for CompilerVersion { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], has_qfe: bool) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let version = CompilerVersion { + major: buf.parse()?, + minor: buf.parse()?, + build: buf.parse()?, + qfe: if has_qfe { Some(buf.parse()?) } else { None }, + }; + + Ok((version, buf.pos())) + } +} + +/// Compile flags declared in `ExtendedCompileFlagsSymbol`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ExtendedCompileFlags { + /// Compiled for E/C. + edit_and_continue: bool, + /// Compiled without debug information. + no_debug_info: bool, + /// Compiled with `/LTCG`. + link_time_codegen: bool, + /// Compiled with `-Bzalign`. + no_data_align: bool, + /// Managed code or data is present. + managed: bool, + /// Compiled with `/GS`. + security_checks: bool, + /// Compiled with `/hotpatch`. + hot_patch: bool, + /// Compiled with `CvtCIL`. + cvtcil: bool, + /// This is a MSIL .NET Module. + msil_module: bool, + /// Compiled with `/sdl`. + sdl: bool, + /// Compiled with `/ltcg:pgo` or `pgu`. + pgo: bool, + /// This is a .exp module. + exp_module: bool, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlags { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let is_compile3 = kind == S_COMPILE3; + + let raw = this.pread_with::(0, LE)?; + this.pread::(2)?; // unused + + let flags = ExtendedCompileFlags { + edit_and_continue: raw & 1 != 0, + no_debug_info: (raw >> 1) & 1 != 0, + link_time_codegen: (raw >> 2) & 1 != 0, + no_data_align: (raw >> 3) & 1 != 0, + managed: (raw >> 4) & 1 != 0, + security_checks: (raw >> 5) & 1 != 0, + hot_patch: (raw >> 6) & 1 != 0, + cvtcil: (raw >> 7) & 1 != 0, + msil_module: (raw >> 8) & 1 != 0, + sdl: (raw >> 9) & 1 != 0 && is_compile3, + pgo: (raw >> 10) & 1 != 0 && is_compile3, + exp_module: (raw >> 11) & 1 != 0 && is_compile3, + }; + + Ok((flags, 3)) + } } /// The information parsed from a symbol record with kind -/// `S_COMPILE3` -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Compile3Symbol { +/// `S_COMPILE2`, `S_COMPILE2_ST`, or `S_COMPILE3` +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ExtendedCompileFlagsSymbol { pub language: SourceLanguage, - pub flags: [u8; 3], + pub flags: ExtendedCompileFlags, pub cpu_type: CPUType, - pub frontend_version: [u16; 4], - pub backend_version: [u16; 4], + pub frontend_version: CompilerVersion, + pub backend_version: CompilerVersion, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlagsSymbol { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let has_qfe = kind == S_COMPILE3; + let symbol = ExtendedCompileFlagsSymbol { + language: buf.parse()?, + flags: buf.parse_with(kind)?, + cpu_type: buf.parse()?, + frontend_version: buf.parse_with(has_qfe)?, + backend_version: buf.parse_with(has_qfe)?, + }; + + Ok((symbol, buf.pos())) + } } /// The information parsed from a symbol record with kind /// `S_UNAMESPACE`, or `S_UNAMESPACE_ST`. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct NamespaceSymbol {} +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct UsingNamespaceSymbol<'t> { + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for UsingNamespaceSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = UsingNamespaceSymbol { + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} // CV_LVARFLAGS: const CV_LVARFLAG_ISPARAM: u16 = 0x01; @@ -615,7 +941,7 @@ const CV_LVARFLAG_ISENREG_GLOB: u16 = 0x100; const CV_LVARFLAG_ISENREG_STAT: u16 = 0x200; /// The information parsed from a CV_LVARFLAGS bit field -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct LocalVariableFlags { pub isparam: bool, // Variable is a parameter pub addrtaken: bool, // Address is taken @@ -629,33 +955,58 @@ pub struct LocalVariableFlags { pub isenreg_stat: bool, // Variable is an enregistered static } -impl LocalVariableFlags { - fn new(flags: u16) -> Self { - LocalVariableFlags { - isparam: flags & CV_LVARFLAG_ISPARAM != 0, - addrtaken: flags & CV_LVARFLAG_ADDRTAKEN != 0, - compgenx: flags & CV_LVARFLAG_COMPGENX != 0, - isaggregate: flags & CV_LVARFLAG_ISAGGREGATE != 0, - isaliased: flags & CV_LVARFLAG_ISALIASED != 0, - isalias: flags & CV_LVARFLAG_ISALIAS != 0, - isretvalue: flags & CV_LVARFLAG_ISRETVALUE != 0, - isoptimizedout: flags & CV_LVARFLAG_ISOPTIMIZEDOUT != 0, - isenreg_glob: flags & CV_LVARFLAG_ISENREG_GLOB != 0, - isenreg_stat: flags & CV_LVARFLAG_ISENREG_STAT != 0, - } +impl<'t> TryFromCtx<'t, Endian> for LocalVariableFlags { + type Error = scroll::Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + let (value, size) = u16::try_from_ctx(this, le)?; + + let flags = LocalVariableFlags { + isparam: value & CV_LVARFLAG_ISPARAM != 0, + addrtaken: value & CV_LVARFLAG_ADDRTAKEN != 0, + compgenx: value & CV_LVARFLAG_COMPGENX != 0, + isaggregate: value & CV_LVARFLAG_ISAGGREGATE != 0, + isaliased: value & CV_LVARFLAG_ISALIASED != 0, + isalias: value & CV_LVARFLAG_ISALIAS != 0, + isretvalue: value & CV_LVARFLAG_ISRETVALUE != 0, + isoptimizedout: value & CV_LVARFLAG_ISOPTIMIZEDOUT != 0, + isenreg_glob: value & CV_LVARFLAG_ISENREG_GLOB != 0, + isenreg_stat: value & CV_LVARFLAG_ISENREG_STAT != 0, + }; + + Ok((flags, size)) } } /// The information parsed from a symbol record with kind /// `S_LOCAL` -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct LocalSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct LocalSymbol<'t> { pub type_index: TypeIndex, pub flags: LocalVariableFlags, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for LocalSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = LocalSymbol { + type_index: buf.parse()?, + flags: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L4456 -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ExportSymbolFlags { pub constant: bool, pub data: bool, @@ -665,25 +1016,50 @@ pub struct ExportSymbolFlags { pub forwarder: bool, } -impl ExportSymbolFlags { - fn new(flags: u16) -> Self { - ExportSymbolFlags { - constant: flags & 0x01 != 0, - data: flags & 0x02 != 0, - private: flags & 0x04 != 0, - no_name: flags & 0x08 != 0, - ordinal: flags & 0x10 != 0, - forwarder: flags & 0x20 != 0, - } +impl<'t> TryFromCtx<'t, Endian> for ExportSymbolFlags { + type Error = scroll::Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + let (value, size) = u16::try_from_ctx(this, le)?; + + let flags = ExportSymbolFlags { + constant: value & 0x01 != 0, + data: value & 0x02 != 0, + private: value & 0x04 != 0, + no_name: value & 0x08 != 0, + ordinal: value & 0x10 != 0, + forwarder: value & 0x20 != 0, + }; + + Ok((flags, size)) } } /// The information parsed from a symbol record with kind /// `S_EXPORT` -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct ExportSymbol { +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ExportSymbol<'t> { pub ordinal: u16, pub flags: ExportSymbolFlags, + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ExportSymbol<'t> { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = ExportSymbol { + ordinal: buf.parse()?, + flags: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } } /// A `SymbolIter` iterates over a `SymbolTable`, producing `Symbol`s. @@ -707,43 +1083,35 @@ impl<'t> FallibleIterator for SymbolIter<'t> { type Error = Error; fn next(&mut self) -> result::Result, Self::Error> { - // see if we're at EOF - if self.buf.is_empty() { - return Ok(None); - } + while !self.buf.is_empty() { + // read the length of the next symbol + let symbol_length = self.buf.parse::()? as usize; + if symbol_length < 2 { + // this can't be correct + return Err(Error::SymbolTooShort); + } - // read the length of the next symbol - let symbol_length = self.buf.parse_u16()? as usize; + // grab the symbol itself + let data = self.buf.take(symbol_length)?; + let symbol = Symbol(data); - // validate - if symbol_length < 2 { - // this can't be correct - return Err(Error::SymbolTooShort); - } + if symbol.raw_kind() == S_ALIGN { + // S_ALIGN is used for page alignment of symbols. + continue; + } - // grab the symbol itself - let symbol = self.buf.take(symbol_length)?; + return Ok(Some(symbol)); + } - // Done - Ok(Some(Symbol(symbol))) + Ok(None) } } #[cfg(test)] mod tests { mod parsing { - use crate::common::*; use crate::symbol::*; - fn parse<'s>(buf: &'s [u8]) -> Result<(Symbol<'s>, SymbolData, String)> { - let symbol = Symbol(buf); - - let data = symbol.parse()?; - let name = symbol.name()?.to_string().into_owned(); - - Ok((symbol, data, name)) - } - #[test] fn kind_110e() { let buf = &[ @@ -751,11 +1119,12 @@ mod tests { 116, 100, 105, 111, 95, 112, 114, 105, 110, 116, 102, 95, 111, 112, 116, 105, 111, 110, 115, 0, 0, ]; - let (symbol, data, name) = parse(buf).expect("parse"); + + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x110e); assert_eq!( - data, - SymbolData::PublicSymbol(PublicSymbol { + symbol.parse().expect("parse"), + SymbolData::Public(PublicSymbol { code: false, function: true, managed: false, @@ -763,10 +1132,10 @@ mod tests { offset: PdbInternalSectionOffset { offset: 21952, section: 1 - } + }, + name: "__local_stdio_printf_options".into(), }) ); - assert_eq!(name, "__local_stdio_printf_options"); } #[test] @@ -775,30 +1144,32 @@ mod tests { 37, 17, 0, 0, 0, 0, 108, 0, 0, 0, 1, 0, 66, 97, 122, 58, 58, 102, 95, 112, 117, 98, 108, 105, 99, 0, ]; - let (symbol, data, name) = parse(buf).expect("parse"); + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x1125); assert_eq!( - data, + symbol.parse().expect("parse"), SymbolData::ProcedureReference(ProcedureReferenceSymbol { global: true, sum_name: 0, symbol_index: 108, - module: 1 + module: 1, + name: Some("Baz::f_public".into()), }) ); - assert_eq!(name, "Baz::f_public"); } #[test] fn kind_1108() { let buf = &[8, 17, 112, 6, 0, 0, 118, 97, 95, 108, 105, 115, 116, 0]; - let (symbol, data, name) = parse(buf).expect("parse"); + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x1108); assert_eq!( - data, - SymbolData::UserDefinedType(UserDefinedTypeSymbol { type_index: 1648 }) + symbol.parse().expect("parse"), + SymbolData::UserDefinedType(UserDefinedTypeSymbol { + type_index: 1648, + name: "va_list".into(), + }) ); - assert_eq!(name, "va_list"); } #[test] @@ -807,16 +1178,17 @@ mod tests { 7, 17, 201, 18, 0, 0, 1, 0, 95, 95, 73, 83, 65, 95, 65, 86, 65, 73, 76, 65, 66, 76, 69, 95, 83, 83, 69, 50, 0, 0, ]; - let (symbol, data, name) = parse(buf).expect("parse"); + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x1107); assert_eq!( - data, + symbol.parse().expect("parse"), SymbolData::Constant(ConstantSymbol { + managed: false, type_index: 4809, - value: Variant::U16(1) + value: Variant::U16(1), + name: "__ISA_AVAILABLE_SSE2".into(), }) ); - assert_eq!(name, "__ISA_AVAILABLE_SSE2"); } #[test] @@ -825,21 +1197,21 @@ mod tests { 13, 17, 116, 0, 0, 0, 16, 0, 0, 0, 3, 0, 95, 95, 105, 115, 97, 95, 97, 118, 97, 105, 108, 97, 98, 108, 101, 0, 0, 0, ]; - let (symbol, data, name) = parse(buf).expect("parse"); + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x110d); assert_eq!( - data, - SymbolData::DataSymbol(DataSymbol { + symbol.parse().expect("parse"), + SymbolData::Data(DataSymbol { global: true, managed: false, type_index: 116, offset: PdbInternalSectionOffset { offset: 16, section: 3 - } + }, + name: "__isa_available".into(), }) ); - assert_eq!(name, "__isa_available"); } #[test] @@ -848,21 +1220,21 @@ mod tests { 12, 17, 32, 0, 0, 0, 240, 36, 1, 0, 2, 0, 36, 120, 100, 97, 116, 97, 115, 121, 109, 0, ]; - let (symbol, data, name) = parse(buf).expect("parse"); + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x110c); assert_eq!( - data, - SymbolData::DataSymbol(DataSymbol { + symbol.parse().expect("parse"), + SymbolData::Data(DataSymbol { global: false, managed: false, type_index: 32, offset: PdbInternalSectionOffset { offset: 74992, section: 2 - } + }, + name: "$xdatasym".into(), }) ); - assert_eq!(name, "$xdatasym"); } #[test] @@ -871,18 +1243,18 @@ mod tests { 39, 17, 0, 0, 0, 0, 128, 4, 0, 0, 182, 0, 99, 97, 112, 116, 117, 114, 101, 95, 99, 117, 114, 114, 101, 110, 116, 95, 99, 111, 110, 116, 101, 120, 116, 0, 0, 0, ]; - let (symbol, data, name) = parse(buf).expect("parse"); + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x1127); assert_eq!( - data, + symbol.parse().expect("parse"), SymbolData::ProcedureReference(ProcedureReferenceSymbol { global: false, sum_name: 0, symbol_index: 1152, - module: 182 + module: 182, + name: Some("capture_current_context".into()), }) ); - assert_eq!(name, "capture_current_context"); } #[test] @@ -892,10 +1264,10 @@ mod tests { 16, 0, 0, 64, 85, 0, 0, 1, 0, 0, 66, 97, 122, 58, 58, 102, 95, 112, 114, 111, 116, 101, 99, 116, 101, 100, 0, ]; - let (symbol, data, name) = parse(buf).expect("parse"); + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x1110); assert_eq!( - data, + symbol.parse().expect("parse"), SymbolData::Procedure(ProcedureSymbol { global: true, parent: 0, @@ -918,10 +1290,10 @@ mod tests { cust_call: false, noinline: false, optdbginfo: false - } + }, + name: "Baz::f_protected".into(), }) ); - assert_eq!(name, "Baz::f_protected"); } #[test] @@ -931,10 +1303,10 @@ mod tests { 128, 16, 0, 0, 196, 87, 0, 0, 1, 0, 128, 95, 95, 115, 99, 114, 116, 95, 99, 111, 109, 109, 111, 110, 95, 109, 97, 105, 110, 0, 0, 0, ]; - let (symbol, data, name) = parse(buf).expect("parse"); + let symbol = Symbol(buf); assert_eq!(symbol.raw_kind(), 0x110f); assert_eq!( - data, + symbol.parse().expect("parse"), SymbolData::Procedure(ProcedureSymbol { global: false, parent: 0, @@ -957,10 +1329,10 @@ mod tests { cust_call: false, noinline: false, optdbginfo: true - } + }, + name: "__scrt_common_main".into(), }) ); - assert_eq!(name, "__scrt_common_main"); } } } diff --git a/src/tpi/data.rs b/src/tpi/data.rs index 27869d9..67458ce 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -51,7 +51,7 @@ impl<'t> TypeData<'t> { _ => return None, }; - Some(name.clone()) + Some(*name) } } @@ -215,7 +215,7 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::Enumerate(EnumerateType { attributes: FieldAttributes(buf.parse_u16()?), - value: buf.parse_variant()?, + value: buf.parse()?, name: parse_string(leaf, &mut buf)?, })), diff --git a/tests/omap_address_translation.rs b/tests/omap_address_translation.rs index ee21382..8f68287 100644 --- a/tests/omap_address_translation.rs +++ b/tests/omap_address_translation.rs @@ -30,16 +30,20 @@ fn test_omap_symbol() { // find the target symbol let target_symbol = { let target_name = pdb::RawString::from("NtWaitForSingleObject"); - let iter = global_symbols.iter(); - iter.filter(|sym| sym.name().expect("symbol name") == target_name) - .next() - .expect("iterate symbols") - .expect("find target symbol") + let mut iter = global_symbols.iter(); + iter.find(|sym| { + sym.parse() + .ok() + .and_then(|d| d.name()) + .map_or(false, |n| n == target_name) + }) + .expect("iterate symbols") + .expect("find target symbol") }; // extract the PublicSymbol data let pubsym = match target_symbol.parse().expect("parse symbol") { - pdb::SymbolData::PublicSymbol(pubsym) => pubsym, + pdb::SymbolData::Public(pubsym) => pubsym, _ => panic!("expected public symbol"), }; diff --git a/tests/symbol_table.rs b/tests/symbol_table.rs index 218fca9..9046e5c 100644 --- a/tests/symbol_table.rs +++ b/tests/symbol_table.rs @@ -47,7 +47,6 @@ fn count_symbols() { " assert_eq!(data, SymbolData::{:?});", sym.parse().expect("parse") ); - println!(" assert_eq!(name, {:?});", sym.name().expect("name")); println!("}}"); println!(); } @@ -97,12 +96,12 @@ fn find_symbols() { // walk the symbol table let mut iter = global_symbols.iter(); while let Some(sym) = iter.next().expect("next symbol") { - // get symbol name - let name = sym.name().expect("symbol name"); - // ensure we can parse all the symbols, even though we only want a few let data = sym.parse().expect("symbol parsing"); + // get symbol name + let name = data.name().unwrap_or_default(); + if let Entry::Occupied(mut e) = map.entry(name.as_bytes()) { // this is a symbol we wanted to find // store our data From 7c7b1f2144e0fd33e98c588cae0b2ecf808abc72 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Wed, 31 Jul 2019 16:54:58 +0200 Subject: [PATCH 06/31] Added extra opcode for invalid vs eof --- src/common.rs | 5 +++++ src/symbol/annotations.rs | 21 +++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/common.rs b/src/common.rs index 340cc76..39a262b 100644 --- a/src/common.rs +++ b/src/common.rs @@ -102,6 +102,9 @@ pub enum Error { /// A binary annotation was compressed incorrectly. InvalidCompressedAnnotation, + + /// An unknown binary annotation was encountered. + UnknownBinaryAnnotation(u32), } impl std::error::Error for Error { @@ -142,6 +145,7 @@ impl std::error::Error for Error { Error::InvalidFileChecksumOffset(_) => "Invalid source file checksum offset", Error::LinesNotFound => "Line information not found for a module", Error::InvalidCompressedAnnotation => "Invalid compressed annoation", + Error::UnknownBinaryAnnotation(_) => "Unknown binary annotation", } } } @@ -204,6 +208,7 @@ impl fmt::Display for Error { Error::InvalidFileChecksumOffset(offset) => { write!(f, "Invalid source file checksum offset {:#x}", offset) } + Error::UnknownBinaryAnnotation(num) => write!(f, "Unknown binary annotation {}", num), _ => fmt::Debug::fmt(self, f), } } diff --git a/src/symbol/annotations.rs b/src/symbol/annotations.rs index 86aa170..fcc317e 100644 --- a/src/symbol/annotations.rs +++ b/src/symbol/annotations.rs @@ -7,8 +7,11 @@ use crate::FallibleIterator; /// cvinfo.h #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum BinaryAnnotationOpcode { - /// link time pdb contains PADDINGs - Invalid = 0, + /// link time pdb contains PADDINGs. + /// + /// These are represented with the 0 opcode which is in some PDB + /// implementation called "invalid". + Eof = 0, /// param : start offset CodeOffset = 1, /// param : nth separated code chunk (main code chunk == 0) @@ -36,12 +39,14 @@ enum BinaryAnnotationOpcode { ChangeCodeLengthAndCodeOffset = 12, /// param : end column number ChangeColumnEnd = 13, + /// A non valid value + Invalid, } impl From for BinaryAnnotationOpcode { fn from(value: u32) -> Self { match value { - 0 => BinaryAnnotationOpcode::Invalid, + 0 => BinaryAnnotationOpcode::Eof, 1 => BinaryAnnotationOpcode::CodeOffset, 2 => BinaryAnnotationOpcode::ChangeCodeOffsetBase, 3 => BinaryAnnotationOpcode::ChangeCodeOffset, @@ -129,9 +134,10 @@ impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { return Ok(None); } - let annotation = match BinaryAnnotationOpcode::from(self.uncompress_next()?) { - BinaryAnnotationOpcode::Invalid => { - // invalid opcodes mark the end of the stream. + let op = self.uncompress_next()?; + let annotation = match BinaryAnnotationOpcode::from(op) { + BinaryAnnotationOpcode::Eof => { + // This makes the end of the stream self.buffer = ParseBuffer::default(); return Ok(None); } @@ -181,6 +187,9 @@ impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { BinaryAnnotationOpcode::ChangeColumnEnd => { BinaryAnnotation::ChangeColumnEnd(self.uncompress_next()?) } + BinaryAnnotationOpcode::Invalid => { + return Err(Error::UnknownBinaryAnnotation(op)); + } }; Ok(Some(annotation)) From 3767b59b383b2e057c3ba5d648e02b1ec0128a55 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 31 Jul 2019 19:07:06 +0200 Subject: [PATCH 07/31] Allow random access to symbols in a symbol table --- src/common.rs | 21 +++++- src/modi/mod.rs | 15 ++-- src/symbol/mod.rs | 187 +++++++++++++++++++++++++++++++++------------- 3 files changed, 160 insertions(+), 63 deletions(-) diff --git a/src/common.rs b/src/common.rs index 39a262b..5ffaec2 100644 --- a/src/common.rs +++ b/src/common.rs @@ -626,7 +626,6 @@ impl<'a> TryFromCtx<'a, Endian> for StringRef { } /// Provides little-endian access to a &[u8]. -#[doc(hidden)] #[derive(Debug, Clone)] pub(crate) struct ParseBuffer<'b>(&'b [u8], usize); @@ -670,6 +669,23 @@ impl<'b> ParseBuffer<'b> { self.1 } + /// Seek to the given absolute position. + #[inline] + pub fn seek(&mut self, pos: usize) { + self.1 = std::cmp::min(pos, self.len()); + } + + /// Truncates the buffer at the given absolute position. + #[inline] + pub fn truncate(&mut self, len: usize) -> Result<()> { + if self.0.len() >= len { + self.0 = &self.0[..len]; + Ok(()) + } else { + Err(Error::UnexpectedEof) + } + } + /// Align the current position to the next multiple of `alignment` bytes. #[inline] pub fn align(&mut self, alignment: usize) -> Result<()> { @@ -717,7 +733,6 @@ impl<'b> ParseBuffer<'b> { def_peek!((peek_u8, u8), (peek_u16, u16),); /// Parse a NUL-terminated string from the input. - #[doc(hidden)] #[inline] pub fn parse_cstring(&mut self) -> Result> { let input = &self.0[self.1..]; @@ -732,7 +747,6 @@ impl<'b> ParseBuffer<'b> { } /// Parse a u8-length-prefixed string from the input. - #[doc(hidden)] #[inline] pub fn parse_u8_pascal_string(&mut self) -> Result> { let length = self.parse_u8()? as usize; @@ -740,7 +754,6 @@ impl<'b> ParseBuffer<'b> { } /// Take n bytes from the input - #[doc(hidden)] #[inline] pub fn take(&mut self, n: usize) -> Result<&'b [u8]> { let input = &self.0[self.1..]; diff --git a/src/modi/mod.rs b/src/modi/mod.rs index 609d445..e0580a0 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -1,9 +1,7 @@ -use std::mem; - use crate::common::*; use crate::dbi::Module; use crate::msf::Stream; -use crate::symbol::SymbolIter; +use crate::symbol::{SymbolIndex, SymbolIter}; use crate::FallibleIterator; mod c13; @@ -55,9 +53,16 @@ impl<'s> ModuleInfo<'s> { /// Get an iterator over the all symbols in this module. pub fn symbols(&self) -> Result> { let mut buf = self.stream.parse_buffer(); + buf.truncate(self.symbols_size)?; buf.parse_u32()?; - let symbols = buf.take(self.symbols_size - mem::size_of::())?; - Ok(SymbolIter::new(symbols.into())) + Ok(SymbolIter::new(buf)) + } + + /// Get an iterator over symbols starting at the given index. + pub fn symbols_at(&self, index: SymbolIndex) -> Result> { + let mut iter = self.symbols()?; + iter.seek(index); + Ok(iter) } /// Returns a line program that gives access to file and line information in this module. diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index a8e908d..2b0d387 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -6,7 +6,6 @@ // copied, modified, or distributed except according to those terms. use std::fmt; -use std::result; use scroll::{ctx::TryFromCtx, Endian, Pread, LE}; @@ -24,6 +23,52 @@ pub use self::annotations::*; /// The raw type discriminator for `Symbols`. pub type SymbolKind = u16; +/// A register referred to by its number. +pub type Register = u16; + +/// A reference into the symbol table of a module. +/// +/// To retrieve the symbol referenced by this index, use [`SymbolTable::iter_at`]. When iterating, +/// use [`SymbolIter::seek`] to jump between symbols. +/// +/// [`SymbolTable::iter_at`]: struct.SymbolTable.html#method.iter_at +/// [`SymbolIter::seek`]: struct.SymbolIter.html#method.seek +#[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SymbolIndex(pub u32); + +impl From for SymbolIndex { + fn from(offset: u32) -> Self { + Self(offset) + } +} + +impl From for u32 { + fn from(string_ref: SymbolIndex) -> Self { + string_ref.0 + } +} + +impl fmt::Display for SymbolIndex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:#010x}", self.0) + } +} + +impl fmt::Debug for SymbolIndex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SymbolIndex({})", self) + } +} + +impl<'a> TryFromCtx<'a, Endian> for SymbolIndex { + type Error = scroll::Error; + type Size = usize; + + fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + u32::try_from_ctx(this, le).map(|(i, s)| (Self(i), s)) + } +} + /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, /// constants, data types, and more. /// @@ -75,6 +120,13 @@ impl<'s> SymbolTable<'s> { pub fn iter(&self) -> SymbolIter<'_> { SymbolIter::new(self.stream.parse_buffer()) } + + /// Returns an iterator over symbols starting at the given index. + pub fn iter_at(&self, index: SymbolIndex) -> SymbolIter<'_> { + let mut iter = self.iter(); + iter.seek(index); + iter + } } /// Represents a symbol from the symbol table. @@ -114,8 +166,12 @@ impl<'t> Symbol<'t> { /// corrsponding end symbol. pub fn starts_scope(&self) -> bool { match self.raw_kind() { - S_GPROC32 | S_LPROC32 | S_LPROC32_ID | S_GPROC32_ID | S_BLOCK32 | S_SEPCODE - | S_THUNK32 | S_INLINESITE | S_INLINESITE2 => true, + S_GPROC16 | S_GPROCMIPS | S_GPROCMIPS_ST | S_GPROCIA64 | S_GPROCIA64_ST | S_LPROC16 + | S_LPROC32_DPC | S_LPROCMIPS | S_LPROCMIPS_ST | S_LPROCIA64 | S_LPROCIA64_ST + | S_LPROC32_DPC_ID | S_GPROCMIPS_ID | S_GPROCIA64_ID | S_BLOCK16 | S_BLOCK32 + | S_BLOCK32_ST | S_WITH16 | S_WITH32 | S_WITH32_ST | S_THUNK16 | S_THUNK32 + | S_THUNK32_ST | S_SEPCODE | S_GMANPROC | S_GMANPROC_ST | S_LMANPROC + | S_LMANPROC_ST | S_INLINESITE | S_INLINESITE2 => true, _ => false, } } @@ -233,7 +289,7 @@ pub enum SymbolData<'t> { ObjName(ObjNameSymbol<'t>), // S_COMPILE2 (0x1116) | S_COMPILE2_ST (0x1013) | S_COMPILE3 (0x113c) - ExtendedCompileFlags(ExtendedCompileFlagsSymbol), + ExtendedCompileFlags(ExtendedCompileFlagsSymbol<'t>), // S_UNAMESPACE (0x1124) | S_UNAMESPACE_ST (0x1029) UsingNamespace(UsingNamespaceSymbol<'t>), @@ -323,9 +379,6 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { } } -/// A register referred to by its number. -pub type Register = u16; - /// A Register variable. /// /// `S_REGISTER`, or `S_REGISTER_ST` @@ -474,7 +527,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for DataSymbol<'t> { pub struct ProcedureReferenceSymbol<'t> { pub global: bool, pub sum_name: u32, - pub symbol_index: u32, + pub symbol_index: SymbolIndex, pub module: u16, pub name: Option>, } @@ -507,7 +560,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureReferenceSymbol<'t> { #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct DataReferenceSymbol<'t> { pub sum_name: u32, - pub symbol_index: u32, + pub symbol_index: SymbolIndex, pub module: u16, pub name: Option>, } @@ -534,7 +587,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for DataReferenceSymbol<'t> { #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct AnnotationReferenceSymbol<'t> { pub sum_name: u32, - pub symbol_index: u32, + pub symbol_index: SymbolIndex, pub module: u16, pub name: RawString<'t>, } @@ -691,9 +744,9 @@ impl<'t> TryFromCtx<'t, Endian> for ProcedureFlags { #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ProcedureSymbol<'t> { pub global: bool, - pub parent: u32, - pub end: u32, - pub next: u32, + pub parent: SymbolIndex, + pub end: SymbolIndex, + pub next: SymbolIndex, pub len: u32, pub dbg_start_offset: u32, pub dbg_end_offset: u32, @@ -737,8 +790,8 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { /// `S_INLINESITE` or `S_INLINESITE2`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct InlineSiteSymbol<'t> { - pub parent: u32, - pub end: u32, + pub parent: SymbolIndex, + pub end: SymbolIndex, pub inlinee: ItemId, pub invocations: Option, pub annotations: BinaryAnnotations<'t>, @@ -820,13 +873,13 @@ impl<'t> TryFromCtx<'t, bool> for CompilerVersion { /// Compile flags declared in `ExtendedCompileFlagsSymbol`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ExtendedCompileFlags { - /// Compiled for E/C. + /// Compiled for edit and continue. edit_and_continue: bool, - /// Compiled without debug information. + /// Compiled without debugging info. no_debug_info: bool, - /// Compiled with `/LTCG`. + /// Compiled with `LTCG`. link_time_codegen: bool, - /// Compiled with `-Bzalign`. + /// Compiled with `/bzalign`. no_data_align: bool, /// Managed code or data is present. managed: bool, @@ -840,7 +893,7 @@ pub struct ExtendedCompileFlags { msil_module: bool, /// Compiled with `/sdl`. sdl: bool, - /// Compiled with `/ltcg:pgo` or `pgu`. + /// Compiled with `/ltcg:pgo` or `pgo:`. pgo: bool, /// This is a .exp module. exp_module: bool, @@ -878,15 +931,16 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlags { /// The information parsed from a symbol record with kind /// `S_COMPILE2`, `S_COMPILE2_ST`, or `S_COMPILE3` #[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct ExtendedCompileFlagsSymbol { +pub struct ExtendedCompileFlagsSymbol<'t> { pub language: SourceLanguage, pub flags: ExtendedCompileFlags, pub cpu_type: CPUType, pub frontend_version: CompilerVersion, pub backend_version: CompilerVersion, + pub version_string: RawString<'t>, } -impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlagsSymbol { +impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlagsSymbol<'t> { type Error = Error; type Size = usize; @@ -900,6 +954,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlagsSymbol { cpu_type: buf.parse()?, frontend_version: buf.parse_with(has_qfe)?, backend_version: buf.parse_with(has_qfe)?, + version_string: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) @@ -1076,13 +1131,38 @@ impl<'t> SymbolIter<'t> { pub(crate) fn new(buf: ParseBuffer<'t>) -> SymbolIter<'t> { SymbolIter { buf } } + + /// Move the iterator to the symbol referred to by `index`. + /// + /// This can be used to jump to the sibiling or parent of a symbol record. + pub fn seek(&mut self, index: SymbolIndex) { + // A symbol index of 0 referes to no symbol. Seek to the end of the iterator. + let pos = match index.0 { + 0 => self.buf.pos() + self.buf.len(), + pos => pos as usize, + }; + + self.buf.seek(pos); + } + + /// Skip to the symbol referred to by `index`, returning the symbol. + /// + /// This can be used to jump to the sibiling or parent of a symbol record. Iteration continues + /// after that symbol. + /// + /// Note that the symbol may be located **before** the originating symbol, for instance when + /// jumping to the parent symbol. Take care not to enter an endless loop in this case. + pub fn skip_to(&mut self, index: SymbolIndex) -> Result>> { + self.seek(index); + self.next() + } } impl<'t> FallibleIterator for SymbolIter<'t> { type Item = Symbol<'t>; type Error = Error; - fn next(&mut self) -> result::Result, Self::Error> { + fn next(&mut self) -> Result> { while !self.buf.is_empty() { // read the length of the next symbol let symbol_length = self.buf.parse::()? as usize; @@ -1095,12 +1175,11 @@ impl<'t> FallibleIterator for SymbolIter<'t> { let data = self.buf.take(symbol_length)?; let symbol = Symbol(data); - if symbol.raw_kind() == S_ALIGN { - // S_ALIGN is used for page alignment of symbols. - continue; + // skip over padding in the symbol table + match symbol.raw_kind() { + S_ALIGN | S_SKIP => continue, + _ => return Ok(Some(symbol)), } - - return Ok(Some(symbol)); } Ok(None) @@ -1114,13 +1193,13 @@ mod tests { #[test] fn kind_110e() { - let buf = &[ + let data = &[ 14, 17, 2, 0, 0, 0, 192, 85, 0, 0, 1, 0, 95, 95, 108, 111, 99, 97, 108, 95, 115, 116, 100, 105, 111, 95, 112, 114, 105, 110, 116, 102, 95, 111, 112, 116, 105, 111, 110, 115, 0, 0, ]; - let symbol = Symbol(buf); + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x110e); assert_eq!( symbol.parse().expect("parse"), @@ -1140,18 +1219,18 @@ mod tests { #[test] fn kind_1125() { - let buf = &[ + let data = &[ 37, 17, 0, 0, 0, 0, 108, 0, 0, 0, 1, 0, 66, 97, 122, 58, 58, 102, 95, 112, 117, 98, 108, 105, 99, 0, ]; - let symbol = Symbol(buf); + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x1125); assert_eq!( symbol.parse().expect("parse"), SymbolData::ProcedureReference(ProcedureReferenceSymbol { global: true, sum_name: 0, - symbol_index: 108, + symbol_index: SymbolIndex(108), module: 1, name: Some("Baz::f_public".into()), }) @@ -1160,8 +1239,8 @@ mod tests { #[test] fn kind_1108() { - let buf = &[8, 17, 112, 6, 0, 0, 118, 97, 95, 108, 105, 115, 116, 0]; - let symbol = Symbol(buf); + let data = &[8, 17, 112, 6, 0, 0, 118, 97, 95, 108, 105, 115, 116, 0]; + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x1108); assert_eq!( symbol.parse().expect("parse"), @@ -1174,11 +1253,11 @@ mod tests { #[test] fn kind_1107() { - let buf = &[ + let data = &[ 7, 17, 201, 18, 0, 0, 1, 0, 95, 95, 73, 83, 65, 95, 65, 86, 65, 73, 76, 65, 66, 76, 69, 95, 83, 83, 69, 50, 0, 0, ]; - let symbol = Symbol(buf); + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x1107); assert_eq!( symbol.parse().expect("parse"), @@ -1193,11 +1272,11 @@ mod tests { #[test] fn kind_110d() { - let buf = &[ + let data = &[ 13, 17, 116, 0, 0, 0, 16, 0, 0, 0, 3, 0, 95, 95, 105, 115, 97, 95, 97, 118, 97, 105, 108, 97, 98, 108, 101, 0, 0, 0, ]; - let symbol = Symbol(buf); + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x110d); assert_eq!( symbol.parse().expect("parse"), @@ -1216,11 +1295,11 @@ mod tests { #[test] fn kind_110c() { - let buf = &[ + let data = &[ 12, 17, 32, 0, 0, 0, 240, 36, 1, 0, 2, 0, 36, 120, 100, 97, 116, 97, 115, 121, 109, 0, ]; - let symbol = Symbol(buf); + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x110c); assert_eq!( symbol.parse().expect("parse"), @@ -1239,18 +1318,18 @@ mod tests { #[test] fn kind_1127() { - let buf = &[ + let data = &[ 39, 17, 0, 0, 0, 0, 128, 4, 0, 0, 182, 0, 99, 97, 112, 116, 117, 114, 101, 95, 99, 117, 114, 114, 101, 110, 116, 95, 99, 111, 110, 116, 101, 120, 116, 0, 0, 0, ]; - let symbol = Symbol(buf); + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x1127); assert_eq!( symbol.parse().expect("parse"), SymbolData::ProcedureReference(ProcedureReferenceSymbol { global: false, sum_name: 0, - symbol_index: 1152, + symbol_index: SymbolIndex(1152), module: 182, name: Some("capture_current_context".into()), }) @@ -1259,20 +1338,20 @@ mod tests { #[test] fn kind_1110() { - let buf = &[ + let data = &[ 16, 17, 0, 0, 0, 0, 48, 2, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 7, 16, 0, 0, 64, 85, 0, 0, 1, 0, 0, 66, 97, 122, 58, 58, 102, 95, 112, 114, 111, 116, 101, 99, 116, 101, 100, 0, ]; - let symbol = Symbol(buf); + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x1110); assert_eq!( symbol.parse().expect("parse"), SymbolData::Procedure(ProcedureSymbol { global: true, - parent: 0, - end: 560, - next: 0, + parent: SymbolIndex(0), + end: SymbolIndex(560), + next: SymbolIndex(0), len: 6, dbg_start_offset: 5, dbg_end_offset: 5, @@ -1298,20 +1377,20 @@ mod tests { #[test] fn kind_110f() { - let buf = &[ + let data = &[ 15, 17, 0, 0, 0, 0, 156, 1, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 0, 128, 16, 0, 0, 196, 87, 0, 0, 1, 0, 128, 95, 95, 115, 99, 114, 116, 95, 99, 111, 109, 109, 111, 110, 95, 109, 97, 105, 110, 0, 0, 0, ]; - let symbol = Symbol(buf); + let symbol = Symbol(data); assert_eq!(symbol.raw_kind(), 0x110f); assert_eq!( symbol.parse().expect("parse"), SymbolData::Procedure(ProcedureSymbol { global: false, - parent: 0, - end: 412, - next: 0, + parent: SymbolIndex(0), + end: SymbolIndex(412), + next: SymbolIndex(0), len: 18, dbg_start_offset: 4, dbg_end_offset: 9, From 6885e39889b84f960578d4c41a600eb595ec4c90 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 31 Jul 2019 19:23:24 +0200 Subject: [PATCH 08/31] Implement S_BUILDINFO --- src/symbol/mod.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 2b0d387..c27bcfb 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -285,6 +285,9 @@ pub enum SymbolData<'t> { // S_INLINESITE_END (0x114e) InlineSiteEnd, + // S_BUILDINFO (0x114c) + BuildInfo(BuildInfoSymbol), + // S_OBJNAME (0x1101) | S_OBJNAME_ST (0x0009) ObjName(ObjNameSymbol<'t>), @@ -320,6 +323,7 @@ impl<'t> SymbolData<'t> { SymbolData::ProcedureEnd => None, SymbolData::InlineSite(_) => None, SymbolData::InlineSiteEnd => None, + SymbolData::BuildInfo(_) => None, SymbolData::ObjName(data) => Some(data.name), SymbolData::ExtendedCompileFlags(_) => None, SymbolData::UsingNamespace(data) => Some(data.name), @@ -365,6 +369,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_PROC_ID_END => SymbolData::ProcedureEnd, S_INLINESITE | S_INLINESITE2 => SymbolData::InlineSite(buf.parse_with(kind)?), S_INLINESITE_END => SymbolData::InlineSiteEnd, + S_BUILDINFO => SymbolData::BuildInfo(buf.parse_with(kind)?), S_OBJNAME | S_OBJNAME_ST => SymbolData::ObjName(buf.parse_with(kind)?), S_COMPILE2 | S_COMPILE2_ST | S_COMPILE3 => { SymbolData::ExtendedCompileFlags(buf.parse_with(kind)?) @@ -819,6 +824,26 @@ impl<'t> TryFromCtx<'t, SymbolKind> for InlineSiteSymbol<'t> { } } +/// The information parsed from a symbol record with kind +/// `S_BUILDINFO`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct BuildInfoSymbol { + pub id: ItemId, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for BuildInfoSymbol { + type Error = Error; + type Size = usize; + + fn try_from_ctx(this: &'t [u8], _kind: SymbolKind) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + + let symbol = BuildInfoSymbol { id: buf.parse()? }; + + Ok((symbol, buf.pos())) + } +} + /// The information parsed from a symbol record with kind /// `S_OBJNAME`, or `S_OBJNAME_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] From 24aca5e252172ad31f9112a0e7fd8762c14e334b Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Thu, 1 Aug 2019 12:05:35 +0200 Subject: [PATCH 09/31] Evaluate binary annotations --- src/modi/c13.rs | 2 + src/modi/mod.rs | 2 + src/symbol/annotations.rs | 143 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) diff --git a/src/modi/c13.rs b/src/modi/c13.rs index b9679bc..5ed1a70 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -121,6 +121,7 @@ struct DebugInlineesHeader { #[derive(Clone, Copy, Debug, Default, Pread)] pub struct InlineeSourceLine { pub inlinee: ItemId, + // This should be FileIndex pub file_id: u32, pub source_line_num: u32, } @@ -580,6 +581,7 @@ impl<'a> FallibleIterator for C13LineIterator<'a> { return Ok(Some(LineInfo { offset: section_header.offset + line_entry.offset, + length: None, file_index: FileIndex(block_header.file_index), line_start: line_entry.start_line, line_end: line_entry.end_line, diff --git a/src/modi/mod.rs b/src/modi/mod.rs index e0580a0..215465a 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -133,6 +133,8 @@ pub enum LineInfoKind { pub struct LineInfo { /// Source code offset. pub offset: PdbInternalSectionOffset, + /// The optional length of the code. + pub length: Option, /// Index of the source file in this module. pub file_index: FileIndex, /// Line number of the start of the covered range. diff --git a/src/symbol/annotations.rs b/src/symbol/annotations.rs index fcc317e..e74ad87 100644 --- a/src/symbol/annotations.rs +++ b/src/symbol/annotations.rs @@ -1,6 +1,8 @@ use std::result; use crate::common::*; +use crate::modi::{FileIndex, InlineeSourceLine, LineInfo, LineInfoKind}; +use crate::symbol::SymbolIndex; use crate::FallibleIterator; /// These values correspond to the BinaryAnnotationOpcode enum from the @@ -86,6 +88,18 @@ pub enum BinaryAnnotation { ChangeColumnEnd(u32), } +impl BinaryAnnotation { + /// Does this annotation emit a line info? + pub fn emits_line_info(self) -> bool { + match self { + BinaryAnnotation::ChangeCodeOffset(..) => true, + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(..) => true, + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(..) => true, + _ => false, + } + } +} + /// An iterator over binary annotations used by `S_INLINESITE`. pub struct BinaryAnnotationsIter<'t> { buffer: ParseBuffer<'t>, @@ -196,6 +210,27 @@ impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { } } +#[derive(Copy, Clone, Debug)] +pub struct InstructionLocation { + pub range_kind: LineInfoKind, + pub offset_start: u32, + pub offset_end: u32, + pub line_start: u32, + pub line_end: u32, + pub col_start: u32, + pub col_end: u32, +} + +#[derive(Clone, Debug)] +pub struct Inlinee { + pub id: ItemId, + pub ptr: SymbolIndex, + pub parent: SymbolIndex, + pub file_offset: FileIndex, + pub base_line_num: u32, + pub locations: Vec, +} + /// Binary annotations of a symbol. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct BinaryAnnotations<'t> { @@ -208,6 +243,114 @@ impl<'t> BinaryAnnotations<'t> { BinaryAnnotations { data } } + /// Evalutes the annotations into line infos. + /// + /// `start_offset` is the address of the function that is the base for this + /// inline site. The `source_line` is the base of where the source + /// information is evaluated from. + pub fn evaluate( + &self, + start_offset: PdbInternalSectionOffset, + source_line: &InlineeSourceLine, + ) -> Result> { + let mut iter = self.iter(); + let mut rv: Vec = vec![]; + + let mut file_index = FileIndex(source_line.file_id); + let mut code_offset_base = 0; + let mut code_offset = start_offset; + let mut code_length = 0; + let mut current_line = source_line.source_line_num; + let mut current_line_length = 1; + let mut current_col_start = 1; + let mut current_col_end = 100_000; + let mut range_kind = LineInfoKind::Expression; + + while let Some(op) = iter.next()? { + match op { + BinaryAnnotation::CodeOffset(new_val) => { + code_offset.offset = new_val; + } + BinaryAnnotation::ChangeCodeOffsetBase(new_val) => { + code_offset_base = new_val; + } + BinaryAnnotation::ChangeCodeOffset(delta) => { + code_offset = code_offset.wrapping_add(delta); + } + BinaryAnnotation::ChangeCodeLength(val) => { + if let Some(last_loc) = rv.last_mut() { + if last_loc.length.is_none() && last_loc.kind == range_kind { + last_loc.length = Some(val); + } + } + code_offset = code_offset.wrapping_add(val); + } + BinaryAnnotation::ChangeFile(new_val) => { + file_index = FileIndex(new_val); + } + BinaryAnnotation::ChangeLineOffset(delta) => { + current_line = (i64::from(current_line) + i64::from(delta)) as u32; + } + BinaryAnnotation::ChangeLineEndDelta(new_val) => { + current_line_length = new_val; + } + BinaryAnnotation::ChangeRangeKind(kind) => { + range_kind = match kind { + 0 => LineInfoKind::Expression, + 1 => LineInfoKind::Statement, + _ => range_kind, + }; + } + BinaryAnnotation::ChangeColumnStart(new_val) => { + current_col_start = new_val; + } + BinaryAnnotation::ChangeColumnEndDelta(delta) => { + current_col_end = (i64::from(current_col_end) + i64::from(delta)) as u32; + } + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(code_delta, line_delta) => { + code_offset = PdbInternalSectionOffset { + section: code_offset.section, + offset: (i64::from(code_offset.offset) + i64::from(code_delta)) as u32, + }; + current_line = (i64::from(current_line) + i64::from(line_delta)) as u32; + } + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(new_code_length, code_delta) => { + code_length = new_code_length; + code_offset = PdbInternalSectionOffset { + section: code_offset.section, + offset: (i64::from(code_offset.offset) + i64::from(code_delta)) as u32, + }; + } + BinaryAnnotation::ChangeColumnEnd(new_val) => { + current_col_end = new_val; + } + } + + if op.emits_line_info() { + if let Some(last_loc) = rv.last_mut() { + if last_loc.length.is_none() && last_loc.kind == range_kind { + last_loc.length = Some(code_offset.offset - code_offset_base); + } + } + + rv.push(LineInfo { + kind: range_kind, + file_index, + offset: code_offset + code_offset_base, + length: Some(code_length), + line_start: current_line, + line_end: current_line + current_line_length, + column_start: Some(current_col_start as u16), + column_end: Some(current_col_end as u16), + }); + + code_length = 0; + } + } + + Ok(rv) + } + /// Iterates through binary annotations. pub fn iter(&self) -> BinaryAnnotationsIter<'t> { BinaryAnnotationsIter { From bef9ed79daedcae8b8bc030b587b6d2b4ff3a910 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Thu, 1 Aug 2019 18:30:31 +0200 Subject: [PATCH 10/31] Add missing doc comments --- src/framedata.rs | 1 + src/symbol/annotations.rs | 7 +- src/symbol/mod.rs | 673 +++++++++++++++++++++++++++++--------- 3 files changed, 521 insertions(+), 160 deletions(-) diff --git a/src/framedata.rs b/src/framedata.rs index 34721d7..24ab97e 100644 --- a/src/framedata.rs +++ b/src/framedata.rs @@ -364,6 +364,7 @@ impl From<&'_ NewFrameData> for FrameData { } } +/// Iterator over entries in a [`FrameTable`](struct.FrameTable.html). #[derive(Debug, Default)] pub struct FrameDataIter<'t> { old_frames: &'t [OldFrameData], diff --git a/src/symbol/annotations.rs b/src/symbol/annotations.rs index e74ad87..4e036cc 100644 --- a/src/symbol/annotations.rs +++ b/src/symbol/annotations.rs @@ -9,7 +9,7 @@ use crate::FallibleIterator; /// cvinfo.h #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum BinaryAnnotationOpcode { - /// link time pdb contains PADDINGs. + /// Link time pdb contains PADDINGs. /// /// These are represented with the 0 opcode which is in some PDB /// implementation called "invalid". @@ -232,6 +232,11 @@ pub struct Inlinee { } /// Binary annotations of a symbol. +/// +/// The binary annotation mechanism supports recording a list of annotations in an instruction +/// stream. The X64 unwind code and the DWARF standard have a similar design. +/// +/// Binary annotations are primarily used as line programs for inline function calls. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct BinaryAnnotations<'t> { data: &'t [u8], diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index c27bcfb..dfb68d3 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -31,11 +31,32 @@ pub type Register = u16; /// To retrieve the symbol referenced by this index, use [`SymbolTable::iter_at`]. When iterating, /// use [`SymbolIter::seek`] to jump between symbols. /// -/// [`SymbolTable::iter_at`]: struct.SymbolTable.html#method.iter_at -/// [`SymbolIter::seek`]: struct.SymbolIter.html#method.seek -#[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] +/// The index might also indicate the absence of a symbol (numeric value `0`). This is indicated by +/// `is_none` returning `false`. Seeking to this symbol will return an empty iterator. +/// +/// [`SymbolTable::iter_at`]: struct.SymbolTable.html#method.iter_at [`SymbolIter::seek`]: +/// struct.SymbolIter.html#method.seek +#[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct SymbolIndex(pub u32); +impl SymbolIndex { + /// Returns `true` if the symbol index points to a symbol. + #[inline] + #[must_use] + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn is_some(&self) -> bool { + self.0 != 0 + } + + /// Returns `true` if the symbol index indicates the absence of a symbol. + #[inline] + #[must_use] + #[allow(clippy::trivially_copy_pass_by_ref)] + pub fn is_none(&self) -> bool { + self.0 == 0 + } +} + impl From for SymbolIndex { fn from(offset: u32) -> Self { Self(offset) @@ -196,13 +217,6 @@ impl<'t> fmt::Debug for Symbol<'t> { } } -// data types are defined at: -// https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L3038 -// constants defined at: -// https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2735 -// decoding reference: -// https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/cvdump/dumpsym7.cpp#L264 - fn parse_symbol_name<'t>(buf: &mut ParseBuffer<'t>, kind: SymbolKind) -> Result> { if kind < S_ST_MAX { // Pascal-style name @@ -226,82 +240,60 @@ fn parse_optional_name<'t>( } } -/// `SymbolData` contains the information parsed from a symbol record. +// data types are defined at: +// https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L3038 +// constants defined at: +// https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2735 +// decoding reference: +// https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/cvdump/dumpsym7.cpp#L264 + +/// Information parsed from a [`Symbol`] record. +/// +/// [`Symbol`]: struct.Symbol.html #[derive(Clone, Debug, Eq, PartialEq)] pub enum SymbolData<'t> { - // S_END (0x0006) + /// End of a scope, such as a procedure. ScopeEnd, - - // S_REGISTER (0x1106) | S_REGISTER_ST (0x1001) + /// Name of the object file of this module. + ObjName(ObjNameSymbol<'t>), + /// A Register variable. RegisterVariable(RegisterVariableSymbol<'t>), - - // S_MANYREG (0x110a) | S_MANYREG_ST (0x1005) - // S_MANYREG2 (0x1117) | S_MANYREG2_ST (0x1014) + /// A constant value. + Constant(ConstantSymbol<'t>), + /// A user defined type. + UserDefinedType(UserDefinedTypeSymbol<'t>), + /// A Register variable spanning multiple registers. MultiRegisterVariable(MultiRegisterVariableSymbol<'t>), - - // S_PUB32 (0x110e) | S_PUB32_ST (0x1009) - Public(PublicSymbol<'t>), - - // S_LDATA32 (0x110c) | S_LDATA32_ST (0x1007) - // S_GDATA32 (0x110d) | S_GDATA32_ST (0x1008) - // S_LMANDATA (0x111c) | S_LMANDATA_ST (0x1020) - // S_GMANDATA (0x111d) | S_GMANDATA_ST (0x1021) + /// Static data, such as a global variable. Data(DataSymbol<'t>), - - // S_PROCREF (0x1125) | S_PROCREF_ST (0x0400) - // S_LPROCREF (0x1127) | S_LPROCREF_ST (0x0403) + /// A public symbol with a mangled name. + Public(PublicSymbol<'t>), + /// A procedure, such as a function or method. + Procedure(ProcedureSymbol<'t>), + /// A thread local variable. + ThreadStorage(ThreadStorageSymbol<'t>), + /// Flags used to compile a module. + CompileFlags(CompileFlagsSymbol<'t>), + /// A using namespace directive. + UsingNamespace(UsingNamespaceSymbol<'t>), + /// Reference to a [`ProcedureSymbol`](struct.ProcedureSymbol.html). ProcedureReference(ProcedureReferenceSymbol<'t>), - - // S_DATAREF (0x1126) | S_DATAREF_ST (0x0401) + /// Reference to an imported variable. DataReference(DataReferenceSymbol<'t>), - - // S_ANNOTATIONREF (0x1128) + /// Reference to an annotation. AnnotationReference(AnnotationReferenceSymbol<'t>), - - // S_CONSTANT (0x1107) | S_CONSTANT_ST (0x1002) - Constant(ConstantSymbol<'t>), - - // S_UDT (0x1108) | S_UDT_ST (0x1003) - UserDefinedType(UserDefinedTypeSymbol<'t>), - - // S_LTHREAD32 (0x1112) | S_LTHREAD32_ST (0x100e) - // S_GTHREAD32 (0x1113) | S_GTHREAD32_ST (0x100f) - ThreadStorage(ThreadStorageSymbol<'t>), - - // S_LPROC32 (0x110f) | S_LPROC32_ST (0x100a) - // S_GPROC32 (0x1110) | S_GPROC32_ST (0x100b) - // S_LPROC32_ID (0x1146) | - // S_GPROC32_ID (0x1147) | - // S_LPROC32_DPC (0x1155) | - // S_LPROC32_DPC_ID (0x1156) - Procedure(ProcedureSymbol<'t>), - - // S_PROC_ID_END (0x114f) - ProcedureEnd, - - // S_INLINESITE (0x114d) + /// An exported symbol. + Export(ExportSymbol<'t>), + /// A local symbol in optimized code. + Local(LocalSymbol<'t>), + /// Reference to build information. + BuildInfo(BuildInfoSymbol), + /// The callsite of an inlined function. InlineSite(InlineSiteSymbol<'t>), - - // S_INLINESITE_END (0x114e) + /// End of an inline callsite. InlineSiteEnd, - - // S_BUILDINFO (0x114c) - BuildInfo(BuildInfoSymbol), - - // S_OBJNAME (0x1101) | S_OBJNAME_ST (0x0009) - ObjName(ObjNameSymbol<'t>), - - // S_COMPILE2 (0x1116) | S_COMPILE2_ST (0x1013) | S_COMPILE3 (0x113c) - ExtendedCompileFlags(ExtendedCompileFlagsSymbol<'t>), - - // S_UNAMESPACE (0x1124) | S_UNAMESPACE_ST (0x1029) - UsingNamespace(UsingNamespaceSymbol<'t>), - - // S_LOCAL (0x113e) - Local(LocalSymbol<'t>), - - // S_EXPORT (0x1138) - Export(ExportSymbol<'t>), + /// End of a procedure. + ProcedureEnd, } impl<'t> SymbolData<'t> { @@ -309,26 +301,26 @@ impl<'t> SymbolData<'t> { pub fn name(&self) -> Option> { match self { SymbolData::ScopeEnd => None, + SymbolData::ObjName(data) => Some(data.name), SymbolData::RegisterVariable(_) => None, + SymbolData::Constant(data) => Some(data.name), + SymbolData::UserDefinedType(data) => Some(data.name), SymbolData::MultiRegisterVariable(_) => None, - SymbolData::Public(data) => Some(data.name), SymbolData::Data(data) => Some(data.name), + SymbolData::Public(data) => Some(data.name), + SymbolData::Procedure(data) => Some(data.name), + SymbolData::ThreadStorage(data) => Some(data.name), + SymbolData::CompileFlags(_) => None, + SymbolData::UsingNamespace(data) => Some(data.name), SymbolData::ProcedureReference(data) => data.name, SymbolData::DataReference(data) => data.name, SymbolData::AnnotationReference(data) => Some(data.name), - SymbolData::Constant(data) => Some(data.name), - SymbolData::UserDefinedType(data) => Some(data.name), - SymbolData::ThreadStorage(data) => Some(data.name), - SymbolData::Procedure(data) => Some(data.name), - SymbolData::ProcedureEnd => None, + SymbolData::Export(data) => Some(data.name), + SymbolData::Local(data) => Some(data.name), SymbolData::InlineSite(_) => None, - SymbolData::InlineSiteEnd => None, SymbolData::BuildInfo(_) => None, - SymbolData::ObjName(data) => Some(data.name), - SymbolData::ExtendedCompileFlags(_) => None, - SymbolData::UsingNamespace(data) => Some(data.name), - SymbolData::Local(data) => Some(data.name), - SymbolData::Export(data) => Some(data.name), + SymbolData::InlineSiteEnd => None, + SymbolData::ProcedureEnd => None, } } } @@ -343,40 +335,40 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { let symbol = match kind { S_END => SymbolData::ScopeEnd, + S_OBJNAME | S_OBJNAME_ST => SymbolData::ObjName(buf.parse_with(kind)?), S_REGISTER | S_REGISTER_ST => SymbolData::RegisterVariable(buf.parse_with(kind)?), - S_MANYREG | S_MANYREG_ST | S_MANYREG2 | S_MANYREG2_ST => { - SymbolData::MultiRegisterVariable(buf.parse_with(kind)?) - } - S_PUB32 | S_PUB32_ST => SymbolData::Public(buf.parse_with(kind)?), - S_LDATA32 | S_LDATA32_ST | S_GDATA32 | S_GDATA32_ST | S_LMANDATA | S_LMANDATA_ST - | S_GMANDATA | S_GMANDATA_ST => SymbolData::Data(buf.parse_with(kind)?), - S_PROCREF | S_PROCREF_ST | S_LPROCREF | S_LPROCREF_ST => { - SymbolData::ProcedureReference(buf.parse_with(kind)?) - } - S_DATAREF | S_DATAREF_ST => SymbolData::DataReference(buf.parse_with(kind)?), - S_ANNOTATIONREF => SymbolData::AnnotationReference(buf.parse_with(kind)?), S_CONSTANT | S_CONSTANT_ST | S_MANCONSTANT => { SymbolData::Constant(buf.parse_with(kind)?) } S_UDT | S_UDT_ST | S_COBOLUDT | S_COBOLUDT_ST => { SymbolData::UserDefinedType(buf.parse_with(kind)?) } - S_LTHREAD32 | S_LTHREAD32_ST | S_GTHREAD32 | S_GTHREAD32_ST => { - SymbolData::ThreadStorage(buf.parse_with(kind)?) + S_MANYREG | S_MANYREG_ST | S_MANYREG2 | S_MANYREG2_ST => { + SymbolData::MultiRegisterVariable(buf.parse_with(kind)?) } + S_LDATA32 | S_LDATA32_ST | S_GDATA32 | S_GDATA32_ST | S_LMANDATA | S_LMANDATA_ST + | S_GMANDATA | S_GMANDATA_ST => SymbolData::Data(buf.parse_with(kind)?), + S_PUB32 | S_PUB32_ST => SymbolData::Public(buf.parse_with(kind)?), S_LPROC32 | S_LPROC32_ST | S_GPROC32 | S_GPROC32_ST | S_LPROC32_ID | S_GPROC32_ID | S_LPROC32_DPC | S_LPROC32_DPC_ID => SymbolData::Procedure(buf.parse_with(kind)?), - S_PROC_ID_END => SymbolData::ProcedureEnd, - S_INLINESITE | S_INLINESITE2 => SymbolData::InlineSite(buf.parse_with(kind)?), - S_INLINESITE_END => SymbolData::InlineSiteEnd, - S_BUILDINFO => SymbolData::BuildInfo(buf.parse_with(kind)?), - S_OBJNAME | S_OBJNAME_ST => SymbolData::ObjName(buf.parse_with(kind)?), + S_LTHREAD32 | S_LTHREAD32_ST | S_GTHREAD32 | S_GTHREAD32_ST => { + SymbolData::ThreadStorage(buf.parse_with(kind)?) + } S_COMPILE2 | S_COMPILE2_ST | S_COMPILE3 => { - SymbolData::ExtendedCompileFlags(buf.parse_with(kind)?) + SymbolData::CompileFlags(buf.parse_with(kind)?) } S_UNAMESPACE | S_UNAMESPACE_ST => SymbolData::UsingNamespace(buf.parse_with(kind)?), - S_LOCAL => SymbolData::Local(buf.parse_with(kind)?), + S_PROCREF | S_PROCREF_ST | S_LPROCREF | S_LPROCREF_ST => { + SymbolData::ProcedureReference(buf.parse_with(kind)?) + } + S_DATAREF | S_DATAREF_ST => SymbolData::DataReference(buf.parse_with(kind)?), + S_ANNOTATIONREF => SymbolData::AnnotationReference(buf.parse_with(kind)?), S_EXPORT => SymbolData::Export(buf.parse_with(kind)?), + S_LOCAL => SymbolData::Local(buf.parse_with(kind)?), + S_BUILDINFO => SymbolData::BuildInfo(buf.parse_with(kind)?), + S_INLINESITE | S_INLINESITE2 => SymbolData::InlineSite(buf.parse_with(kind)?), + S_INLINESITE_END => SymbolData::InlineSiteEnd, + S_PROC_ID_END => SymbolData::ProcedureEnd, other => return Err(Error::UnimplementedSymbolKind(other)), }; @@ -386,11 +378,14 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { /// A Register variable. /// -/// `S_REGISTER`, or `S_REGISTER_ST` +/// Symbol kind `S_REGISTER`, or `S_REGISTER_ST` #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct RegisterVariableSymbol<'t> { + /// Identifier of the variable type. pub type_index: TypeIndex, + /// The register this variable is stored in. pub register: Register, + /// Name of the variable. pub name: RawString<'t>, } @@ -413,9 +408,10 @@ impl<'t> TryFromCtx<'t, SymbolKind> for RegisterVariableSymbol<'t> { /// A Register variable spanning multiple registers. /// -/// `S_MANYREG`, `S_MANYREG_ST`, `S_MANYREG2`, or `S_MANYREG2_ST` +/// Symbol kind `S_MANYREG`, `S_MANYREG_ST`, `S_MANYREG2`, or `S_MANYREG2_ST`. #[derive(Clone, Debug, Eq, PartialEq)] pub struct MultiRegisterVariableSymbol<'t> { + /// Identifier of the variable type. pub type_index: TypeIndex, /// Most significant register first. pub registers: Vec<(Register, RawString<'t>)>, @@ -454,14 +450,22 @@ const CVPSF_FUNCTION: u32 = 0x2; const CVPSF_MANAGED: u32 = 0x4; const CVPSF_MSIL: u32 = 0x8; -/// The information parsed from a symbol record with kind `S_PUB32` or `S_PUB32_ST`. +/// A public symbol with a mangled name. +/// +/// Symbol kind `S_PUB32`, or `S_PUB32_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct PublicSymbol<'t> { + /// The public symbol refers to executable code. pub code: bool, + /// The public symbol is a function. pub function: bool, + /// The symbol is in managed code (native or IL). pub managed: bool, + /// The symbol is managed IL code. pub msil: bool, + /// Start offset of the symbol. pub offset: PdbInternalSectionOffset, + /// Mangled name of the symbol. pub name: RawString<'t>, } @@ -486,15 +490,24 @@ impl<'t> TryFromCtx<'t, SymbolKind> for PublicSymbol<'t> { } } -/// The information parsed from a symbol record with kind -/// `S_LDATA32`, `S_LDATA32_ST`, `S_GDATA32`, `S_GDATA32_ST`, -/// `S_LMANDATA`, `S_LMANDATA_ST`, `S_GMANDATA`, or `S_GMANDATA_ST`. +/// Static data, such as a global variable. +/// +/// Symbol kinds: +/// - `S_LDATA32` and `S_LDATA32_ST` for local unmanaged data +/// - `S_GDATA32` and `S_GDATA32_ST` for global unmanaged data +/// - `S_LMANDATA32` and `S_LMANDATA32_ST` for local managed data +/// - `S_GMANDATA32` and `S_GMANDATA32_ST` for global managed data #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct DataSymbol<'t> { + /// Whether this data is global or local. pub global: bool, + /// Whether this data is managed or unmanaged. pub managed: bool, + /// Type identifier of the type of data. pub type_index: TypeIndex, + /// Code offset of the start of the data region. pub offset: PdbInternalSectionOffset, + /// Name of the data variable. pub name: RawString<'t>, } @@ -526,14 +539,22 @@ impl<'t> TryFromCtx<'t, SymbolKind> for DataSymbol<'t> { } } -/// The information parsed from a symbol record with kind -/// `S_PROCREF`, `S_PROCREF_ST`, `S_LPROCREF`, or `S_LPROCREF_ST`. +/// Reference to an imported procedure. +/// +/// Symbol kind `S_PROCREF`, `S_PROCREF_ST`, `S_LPROCREF`, or `S_LPROCREF_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ProcedureReferenceSymbol<'t> { + /// Whether the referenced procedure is global or local. pub global: bool, + /// SUC of the name. pub sum_name: u32, + /// Symbol index of the referenced [`ProcedureSymbol`](struct.ProcedureSymbol.html). + /// + /// Note that this symbol might be located in a different module. pub symbol_index: SymbolIndex, + /// Index of the module containing the actual symbol. pub module: u16, + /// Name of the procedure reference. pub name: Option>, } @@ -561,12 +582,20 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureReferenceSymbol<'t> { } } -/// The information parsed from a symbol record with kind `S_DATAREF` or `S_DATAREF_ST`. +/// Reference to an imported variable. +/// +/// Symbol kind `S_DATAREF`, or `S_DATAREF_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct DataReferenceSymbol<'t> { + /// SUC of the name. pub sum_name: u32, + /// Symbol index of the referenced [`DataSymbol`](struct.DataSymbol.html). + /// + /// Note that this symbol might be located in a different module. pub symbol_index: SymbolIndex, + /// Index of the module containing the actual symbol. pub module: u16, + /// Name of the data reference. pub name: Option>, } @@ -588,12 +617,20 @@ impl<'t> TryFromCtx<'t, SymbolKind> for DataReferenceSymbol<'t> { } } -/// The information parsed from a symbol record with kind `S_ANNOTATIONREF`. +/// Reference to an annotation. +/// +/// Symbol kind `S_ANNOTATIONREF`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct AnnotationReferenceSymbol<'t> { + /// SUC of the name. pub sum_name: u32, + /// Symbol index of the referenced symbol. + /// + /// Note that this symbol might be located in a different module. pub symbol_index: SymbolIndex, + /// Index of the module containing the actual symbol. pub module: u16, + /// Name of the annotation reference. pub name: RawString<'t>, } @@ -615,12 +652,18 @@ impl<'t> TryFromCtx<'t, SymbolKind> for AnnotationReferenceSymbol<'t> { } } -/// The information parsed from a symbol record with kind `S_CONSTANT`, or `S_CONSTANT_ST`. +/// A constant value. +/// +/// Symbol kind `S_CONSTANT`, or `S_CONSTANT_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ConstantSymbol<'t> { + /// Whether this constant has metadata type information. pub managed: bool, + /// The type of this constant or metadata token. pub type_index: TypeIndex, + /// The value of this constant. pub value: Variant, + /// Name of the constant. pub name: RawString<'t>, } @@ -642,10 +685,14 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ConstantSymbol<'t> { } } -/// The information parsed from a symbol record with kind `S_UDT`, or `S_UDT_ST`. +/// A user defined type. +/// +/// Symbol kind `S_UDT`, or `S_UDT_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct UserDefinedTypeSymbol<'t> { + /// Identifier of the type. pub type_index: TypeIndex, + /// Name of the type. pub name: RawString<'t>, } @@ -665,13 +712,20 @@ impl<'t> TryFromCtx<'t, SymbolKind> for UserDefinedTypeSymbol<'t> { } } -/// The information parsed from a symbol record with kind -/// `S_LTHREAD32`, `S_LTHREAD32_ST`, `S_GTHREAD32`, or `S_GTHREAD32_ST`. +/// A thread local variable. +/// +/// Symbol kinds: +/// - `S_LTHREAD32`, `S_LTHREAD32_ST` for local thread storage. +/// - `S_GTHREAD32`, or `S_GTHREAD32_ST` for global thread storage. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ThreadStorageSymbol<'t> { + /// Whether this is a global or local thread storage. pub global: bool, + /// Identifier of the stored type. pub type_index: TypeIndex, + /// Code offset of the thread local. pub offset: PdbInternalSectionOffset, + /// Name of the thread local. pub name: RawString<'t>, } @@ -708,16 +762,24 @@ const CV_PFLAG_CUST_CALL: u8 = 0x20; const CV_PFLAG_NOINLINE: u8 = 0x40; const CV_PFLAG_OPTDBGINFO: u8 = 0x80; -/// The information parsed from a CV_PROCFLAGS bit field +/// Flags of a [`ProcedureSymbol`](struct.ProcedureSymbol). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ProcedureFlags { + /// Frame pointer is present (not omitted). pub nofpo: bool, + /// Interrupt return. pub int: bool, + /// Far return. pub far: bool, + /// Procedure does not return. pub never: bool, + /// Procedure is never called. pub notreached: bool, + /// Custom calling convention. pub cust_call: bool, + /// Marked as `noinline`. pub noinline: bool, + /// Debug information for optimized code is present. pub optdbginfo: bool, } @@ -743,21 +805,42 @@ impl<'t> TryFromCtx<'t, Endian> for ProcedureFlags { } } -/// The information parsed from a symbol record with kind -/// `S_GPROC32`, `S_GPROC32_ST`, `S_LPROC32`, `S_LPROC32_ST` -/// `S_GPROC32_ID`, `S_LPROC32_ID`, `S_LPROC32_DPC`, or `S_LPROC32_DPC_ID` +/// A procedure, such as a function or method. +/// +/// Symbol kinds: +/// - `S_GPROC32`, `S_GPROC32_ST` for global procedures +/// - `S_LPROC32`, `S_LPROC32_ST` for local procedures +/// - `S_LPROC32_DPC` for DPC procedures +/// - `S_GPROC32_ID`, `S_LPROC32_ID`, `S_LPROC32_DPC_ID` for procedures referencing types from the +/// ID stream rather than the Type stream. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ProcedureSymbol<'t> { + /// Whether this is a global or local procedure. pub global: bool, + /// Indicates Deferred Procedure Calls (DPC). + pub dpc: bool, + /// The parent scope that this procedure is nested in. pub parent: SymbolIndex, + /// The end symbol of this procedure. pub end: SymbolIndex, + /// The next procedure symbol. pub next: SymbolIndex, + /// The length of the code block covered by this procedure. pub len: u32, + /// Debug start. pub dbg_start_offset: u32, + /// Debug end. pub dbg_end_offset: u32, + /// Identifier of the procedure type. + /// + /// The type contains the complete signature, including parameters, modifiers and the return + /// type. pub type_index: TypeIndex, + /// Code offset of the start of this procedure. pub offset: PdbInternalSectionOffset, + /// Detailed flags of this procedure. pub flags: ProcedureFlags, + /// The full, demangled name of the procedure. pub name: RawString<'t>, } @@ -773,8 +856,14 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { _ => false, }; + let dpc = match kind { + S_LPROC32_DPC | S_LPROC32_DPC_ID => true, + _ => false, + }; + let symbol = ProcedureSymbol { global, + dpc, parent: buf.parse()?, end: buf.parse()?, next: buf.parse()?, @@ -791,14 +880,24 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { } } -/// The information parsed from a symbol record with kind -/// `S_INLINESITE` or `S_INLINESITE2`. +/// The callsite of an inlined function. +/// +/// Symbol kind `S_INLINESITE`, or `S_INLINESITE2`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct InlineSiteSymbol<'t> { + /// Index of the parent function. + /// + /// This might either be a [`ProcedureSymbol`] or another `InlineSiteSymbol`. + /// + /// [`ProcedureSymbol`](struct.ProcedureSymbol.html) pub parent: SymbolIndex, + /// The end symbol of this callsite. pub end: SymbolIndex, + /// Identifier of the type describing the inline function. pub inlinee: ItemId, + /// The total number of invocations of the inline function. pub invocations: Option, + /// Binary annotations containing the line program of this call site. pub annotations: BinaryAnnotations<'t>, } @@ -824,10 +923,12 @@ impl<'t> TryFromCtx<'t, SymbolKind> for InlineSiteSymbol<'t> { } } -/// The information parsed from a symbol record with kind -/// `S_BUILDINFO`. +/// Reference to build information. +/// +/// Symbol kind `S_BUILDINFO`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct BuildInfoSymbol { + /// Identifier of the build information record. pub id: ItemId, } @@ -844,11 +945,14 @@ impl<'t> TryFromCtx<'t, SymbolKind> for BuildInfoSymbol { } } -/// The information parsed from a symbol record with kind -/// `S_OBJNAME`, or `S_OBJNAME_ST`. +/// Name of the object file of this module. +/// +/// Symbol kind `S_OBJNAME`, or `S_OBJNAME_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ObjNameSymbol<'t> { + /// Signature. pub signature: u32, + /// Path to the object file. pub name: RawString<'t>, } @@ -868,12 +972,16 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ObjNameSymbol<'t> { } } -/// A version number refered to by `ExtendedCompileFlagsSymbol`. +/// A version number refered to by `CompileFlagsSymbol`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct CompilerVersion { + /// The major version number. pub major: u16, + /// The minor version number. pub minor: u16, + /// The build (patch) version number. pub build: u16, + /// The QFE (quick fix engineering) number. pub qfe: Option, } @@ -895,9 +1003,9 @@ impl<'t> TryFromCtx<'t, bool> for CompilerVersion { } } -/// Compile flags declared in `ExtendedCompileFlagsSymbol`. +/// Compile flags declared in `CompileFlagsSymbol`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct ExtendedCompileFlags { +pub struct CompileFlags { /// Compiled for edit and continue. edit_and_continue: bool, /// Compiled without debugging info. @@ -924,7 +1032,7 @@ pub struct ExtendedCompileFlags { exp_module: bool, } -impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlags { +impl<'t> TryFromCtx<'t, SymbolKind> for CompileFlags { type Error = Error; type Size = usize; @@ -934,7 +1042,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlags { let raw = this.pread_with::(0, LE)?; this.pread::(2)?; // unused - let flags = ExtendedCompileFlags { + let flags = CompileFlags { edit_and_continue: raw & 1 != 0, no_debug_info: (raw >> 1) & 1 != 0, link_time_codegen: (raw >> 2) & 1 != 0, @@ -953,19 +1061,27 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlags { } } -/// The information parsed from a symbol record with kind -/// `S_COMPILE2`, `S_COMPILE2_ST`, or `S_COMPILE3` +/// Flags used to compile a module. +/// +/// Symbol kind `S_COMPILE2`, `S_COMPILE2_ST`, or `S_COMPILE3`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct ExtendedCompileFlagsSymbol<'t> { +pub struct CompileFlagsSymbol<'t> { + /// The source code language. pub language: SourceLanguage, - pub flags: ExtendedCompileFlags, + /// Compiler flags. + pub flags: CompileFlags, + /// Machine type of the compilation target. pub cpu_type: CPUType, + /// Version of the compiler frontend. pub frontend_version: CompilerVersion, + /// Version of the compiler backend. pub backend_version: CompilerVersion, + /// Display name of the compiler. pub version_string: RawString<'t>, + // TODO: Command block for S_COMPILE2? } -impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlagsSymbol<'t> { +impl<'t> TryFromCtx<'t, SymbolKind> for CompileFlagsSymbol<'t> { type Error = Error; type Size = usize; @@ -973,7 +1089,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlagsSymbol<'t> { let mut buf = ParseBuffer::from(this); let has_qfe = kind == S_COMPILE3; - let symbol = ExtendedCompileFlagsSymbol { + let symbol = CompileFlagsSymbol { language: buf.parse()?, flags: buf.parse_with(kind)?, cpu_type: buf.parse()?, @@ -986,10 +1102,12 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ExtendedCompileFlagsSymbol<'t> { } } -/// The information parsed from a symbol record with kind -/// `S_UNAMESPACE`, or `S_UNAMESPACE_ST`. +/// A using namespace directive. +/// +/// Symbol kind `S_UNAMESPACE`, or `S_UNAMESPACE_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct UsingNamespaceSymbol<'t> { + /// The name of the imported namespace. pub name: RawString<'t>, } @@ -1020,19 +1138,30 @@ const CV_LVARFLAG_ISOPTIMIZEDOUT: u16 = 0x80; const CV_LVARFLAG_ISENREG_GLOB: u16 = 0x100; const CV_LVARFLAG_ISENREG_STAT: u16 = 0x200; -/// The information parsed from a CV_LVARFLAGS bit field +/// Flags for a [`LocalSymbol`](struct.LocalSymbol.html). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct LocalVariableFlags { - pub isparam: bool, // Variable is a parameter - pub addrtaken: bool, // Address is taken - pub compgenx: bool, // Variable is compiler generated - pub isaggregate: bool, // The symbol is splitted in temporaries, which are treated by compiler as independent entities - pub isaliased: bool, // Variable has multiple simultaneous lifetimes - pub isalias: bool, // Represents one of the multiple simultaneous lifetimes - pub isretvalue: bool, // Represents a function return value - pub isoptimizedout: bool, // Variable has no lifetimes - pub isenreg_glob: bool, // Variable is an enregistered global - pub isenreg_stat: bool, // Variable is an enregistered static + /// Variable is a parameter. + pub isparam: bool, + /// Address is taken. + pub addrtaken: bool, + /// Variable is compiler generated. + pub compgenx: bool, + /// The symbol is splitted in temporaries, which are treated by compiler as independent + /// entities. + pub isaggregate: bool, + /// Variable has multiple simultaneous lifetimes. + pub isaliased: bool, + /// Represents one of the multiple simultaneous lifetimes. + pub isalias: bool, + /// Represents a function return value. + pub isretvalue: bool, + /// Variable has no lifetimes. + pub isoptimizedout: bool, + /// Variable is an enregistered global. + pub isenreg_glob: bool, + /// Variable is an enregistered static. + pub isenreg_stat: bool, } impl<'t> TryFromCtx<'t, Endian> for LocalVariableFlags { @@ -1059,12 +1188,16 @@ impl<'t> TryFromCtx<'t, Endian> for LocalVariableFlags { } } -/// The information parsed from a symbol record with kind -/// `S_LOCAL` +/// A local symbol in optimized code. +/// +/// Symbol kind `S_LOCAL`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct LocalSymbol<'t> { + /// The type of the symbol. pub type_index: TypeIndex, + /// Flags for this symbol. pub flags: LocalVariableFlags, + /// Name of the symbol. pub name: RawString<'t>, } @@ -1086,13 +1219,20 @@ impl<'t> TryFromCtx<'t, SymbolKind> for LocalSymbol<'t> { } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L4456 +/// Flags of an [`ExportSymbol`](struct.ExportSymbol.html). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ExportSymbolFlags { + /// An exported constant. pub constant: bool, + /// Exported data (e.g. a static variable). pub data: bool, + /// A private symbol. pub private: bool, + /// A symbol with no name. pub no_name: bool, + /// Ordinal was explicitly assigned. pub ordinal: bool, + /// This is a forwarder. pub forwarder: bool, } @@ -1116,12 +1256,16 @@ impl<'t> TryFromCtx<'t, Endian> for ExportSymbolFlags { } } -/// The information parsed from a symbol record with kind -/// `S_EXPORT` +/// An exported symbol. +/// +/// Symbol kind `S_EXPORT`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ExportSymbol<'t> { + /// Ordinal of the symbol. pub ordinal: u16, + /// Flags declaring the type of the exported symbol. pub flags: ExportSymbolFlags, + /// The name of the exported symbol. pub name: RawString<'t>, } @@ -1216,6 +1360,46 @@ mod tests { mod parsing { use crate::symbol::*; + #[test] + fn kind_0006() { + let data = &[6, 0]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x0006); + assert_eq!(symbol.parse().expect("parse"), SymbolData::ScopeEnd); + } + + #[test] + fn kind_1101() { + let data = &[1, 17, 0, 0, 0, 0, 42, 32, 67, 73, 76, 32, 42, 0]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x1101); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::ObjName(ObjNameSymbol { + signature: 0, + name: "* CIL *".into(), + }) + ); + } + + #[test] + fn kind_1106() { + let data = &[6, 17, 120, 34, 0, 0, 18, 0, 116, 104, 105, 115, 0, 0]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x1106); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::RegisterVariable(RegisterVariableSymbol { + type_index: 8824, + register: 18, + name: "this".into(), + }) + ); + } + #[test] fn kind_110e() { let data = &[ @@ -1242,6 +1426,18 @@ mod tests { ); } + #[test] + fn kind_1124() { + let data = &[36, 17, 115, 116, 100, 0]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x1124); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::UsingNamespace(UsingNamespaceSymbol { name: "std".into() }) + ); + } + #[test] fn kind_1125() { let data = &[ @@ -1438,5 +1634,164 @@ mod tests { }) ); } + + #[test] + fn kind_1116() { + let data = &[ + 22, 17, 7, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 14, 0, 10, 0, 115, 98, 77, 105, 99, + 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 76, 73, 78, 75, 0, 0, 0, 0, + ]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x1116); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::CompileFlags(CompileFlagsSymbol { + language: SourceLanguage::Link, + flags: CompileFlags { + edit_and_continue: false, + no_debug_info: false, + link_time_codegen: false, + no_data_align: false, + managed: false, + security_checks: false, + hot_patch: false, + cvtcil: false, + msil_module: false, + sdl: false, + pgo: false, + exp_module: false, + }, + cpu_type: CPUType::Intel80386, + frontend_version: CompilerVersion { + major: 0, + minor: 0, + build: 0, + qfe: None, + }, + backend_version: CompilerVersion { + major: 14, + minor: 10, + build: 25203, + qfe: None, + }, + version_string: "Microsoft (R) LINK".into(), + }) + ); + } + + #[test] + fn kind_113c() { + let data = &[ + 60, 17, 1, 36, 2, 0, 7, 0, 19, 0, 13, 0, 6, 102, 0, 0, 19, 0, 13, 0, 6, 102, 0, 0, + 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 79, 112, 116, 105, + 109, 105, 122, 105, 110, 103, 32, 67, 111, 109, 112, 105, 108, 101, 114, 0, + ]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x113c); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::CompileFlags(CompileFlagsSymbol { + language: SourceLanguage::Cpp, + flags: CompileFlags { + edit_and_continue: false, + no_debug_info: false, + link_time_codegen: true, + no_data_align: false, + managed: false, + security_checks: true, + hot_patch: false, + cvtcil: false, + msil_module: false, + sdl: true, + pgo: false, + exp_module: false, + }, + cpu_type: CPUType::Pentium3, + frontend_version: CompilerVersion { + major: 19, + minor: 13, + build: 26118, + qfe: Some(0), + }, + backend_version: CompilerVersion { + major: 19, + minor: 13, + build: 26118, + qfe: Some(0), + }, + version_string: "Microsoft (R) Optimizing Compiler".into(), + }) + ); + } + + #[test] + fn kind_113e() { + let data = &[62, 17, 193, 19, 0, 0, 1, 0, 116, 104, 105, 115, 0, 0]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x113e); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::Local(LocalSymbol { + type_index: 5057, + flags: LocalVariableFlags { + isparam: true, + addrtaken: false, + compgenx: false, + isaggregate: false, + isaliased: false, + isalias: false, + isretvalue: false, + isoptimizedout: false, + isenreg_glob: false, + isenreg_stat: false, + }, + name: "this".into(), + }) + ); + } + + #[test] + fn kind_114c() { + let data = &[76, 17, 95, 17, 0, 0]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x114c); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::BuildInfo(BuildInfoSymbol { id: 0x115F }) + ); + } + + #[test] + fn kind_114d() { + let data = &[ + 77, 17, 144, 1, 0, 0, 208, 1, 0, 0, 121, 17, 0, 0, 12, 6, 3, 0, + ]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x114d); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::InlineSite(InlineSiteSymbol { + parent: SymbolIndex(0x00000190), + end: SymbolIndex(0x000001d0), + inlinee: 4473, + invocations: None, + annotations: BinaryAnnotations::new(&[12, 6, 3, 0]), + }) + ); + } + + #[test] + fn kind_114e() { + let data = &[78, 17]; + + let symbol = Symbol(data); + assert_eq!(symbol.raw_kind(), 0x114e); + assert_eq!(symbol.parse().expect("parse"), SymbolData::InlineSiteEnd); + } } } From 8da50e20e5f9c9f41552b0d71ba2753886387b82 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 2 Aug 2019 09:52:14 +0200 Subject: [PATCH 11/31] Fix doc comments of SymbolIndex --- src/symbol/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index dfb68d3..d784e41 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -28,14 +28,14 @@ pub type Register = u16; /// A reference into the symbol table of a module. /// -/// To retrieve the symbol referenced by this index, use [`SymbolTable::iter_at`]. When iterating, +/// To retrieve the symbol referenced by this index, use [`ModuleInfo::symbols_at`]. When iterating, /// use [`SymbolIter::seek`] to jump between symbols. /// /// The index might also indicate the absence of a symbol (numeric value `0`). This is indicated by /// `is_none` returning `false`. Seeking to this symbol will return an empty iterator. /// -/// [`SymbolTable::iter_at`]: struct.SymbolTable.html#method.iter_at [`SymbolIter::seek`]: -/// struct.SymbolIter.html#method.seek +/// [`ModuleInfo::symbols_at`]: struct.ModuleInfo.html#method.symbols_at +/// [`SymbolIter::seek`]: struct.SymbolIter.html#method.seek #[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct SymbolIndex(pub u32); From 8eb8c93492a7d0add0ec07a37711019788b996f2 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 2 Aug 2019 10:30:14 +0200 Subject: [PATCH 12/31] Fix symbols tests and lints --- src/symbol/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index d784e41..e356a29 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -1570,6 +1570,7 @@ mod tests { symbol.parse().expect("parse"), SymbolData::Procedure(ProcedureSymbol { global: true, + dpc: false, parent: SymbolIndex(0), end: SymbolIndex(560), next: SymbolIndex(0), @@ -1609,6 +1610,7 @@ mod tests { symbol.parse().expect("parse"), SymbolData::Procedure(ProcedureSymbol { global: false, + dpc: false, parent: SymbolIndex(0), end: SymbolIndex(412), next: SymbolIndex(0), @@ -1776,8 +1778,8 @@ mod tests { assert_eq!( symbol.parse().expect("parse"), SymbolData::InlineSite(InlineSiteSymbol { - parent: SymbolIndex(0x00000190), - end: SymbolIndex(0x000001d0), + parent: SymbolIndex(0x0190), + end: SymbolIndex(0x01d0), inlinee: 4473, invocations: None, annotations: BinaryAnnotations::new(&[12, 6, 3, 0]), From 91c5a6753ad1e1f8b2a7fb92495f7b4c1406de93 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 2 Aug 2019 11:26:21 +0200 Subject: [PATCH 13/31] Expose the index of symbols --- src/symbol/mod.rs | 243 +++++++++++++++++++++++++++++----------------- 1 file changed, 156 insertions(+), 87 deletions(-) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index e356a29..2ece492 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -31,8 +31,9 @@ pub type Register = u16; /// To retrieve the symbol referenced by this index, use [`ModuleInfo::symbols_at`]. When iterating, /// use [`SymbolIter::seek`] to jump between symbols. /// -/// The index might also indicate the absence of a symbol (numeric value `0`). This is indicated by -/// `is_none` returning `false`. Seeking to this symbol will return an empty iterator. +/// The numeric value of this index corresponds to the binary offset of the symbol in its symbol +/// stream. The index might also indicate the absence of a symbol (numeric value `0`). This is +/// indicated by `is_none` returning `false`. Seeking to this symbol will return an empty iterator. /// /// [`ModuleInfo::symbols_at`]: struct.ModuleInfo.html#method.symbols_at /// [`SymbolIter::seek`]: struct.SymbolIter.html#method.seek @@ -90,66 +91,6 @@ impl<'a> TryFromCtx<'a, Endian> for SymbolIndex { } } -/// PDB symbol tables contain names, locations, and metadata about functions, global/static data, -/// constants, data types, and more. -/// -/// The `SymbolTable` holds a `SourceView` referencing the symbol table inside the PDB file. All the -/// data structures returned by a `SymbolTable` refer to that buffer. -/// -/// # Example -/// -/// ``` -/// # use pdb::FallibleIterator; -/// # -/// # fn test() -> pdb::Result { -/// let file = std::fs::File::open("fixtures/self/foo.pdb")?; -/// let mut pdb = pdb::PDB::open(file)?; -/// -/// let symbol_table = pdb.global_symbols()?; -/// let address_map = pdb.address_map()?; -/// -/// # let mut count: usize = 0; -/// let mut symbols = symbol_table.iter(); -/// while let Some(symbol) = symbols.next()? { -/// match symbol.parse() { -/// Ok(pdb::SymbolData::Public(data)) if data.function => { -/// // we found the location of a function! -/// let rva = data.offset.to_rva(&address_map).unwrap_or_default(); -/// println!("{} is {}", rva, data.name); -/// # count += 1; -/// } -/// _ => {} -/// } -/// } -/// -/// # Ok(count) -/// # } -/// # assert!(test().expect("test") > 2000); -/// ``` -#[derive(Debug)] -pub struct SymbolTable<'s> { - stream: Stream<'s>, -} - -impl<'s> SymbolTable<'s> { - /// Parses a symbol table from raw stream data. - pub(crate) fn parse(stream: Stream<'s>) -> Result { - Ok(SymbolTable { stream }) - } - - /// Returns an iterator that can traverse the symbol table in sequential order. - pub fn iter(&self) -> SymbolIter<'_> { - SymbolIter::new(self.stream.parse_buffer()) - } - - /// Returns an iterator over symbols starting at the given index. - pub fn iter_at(&self, index: SymbolIndex) -> SymbolIter<'_> { - let mut iter = self.iter(); - iter.seek(index); - iter - } -} - /// Represents a symbol from the symbol table. /// /// A `Symbol` is represented internally as a `&[u8]`, and in general the bytes inside are not @@ -158,21 +99,30 @@ impl<'s> SymbolTable<'s> { /// To avoid copying, `Symbol`s exist as references to data owned by the parent `SymbolTable`. /// Therefore, a `Symbol` may not outlive its parent `SymbolTable`. #[derive(Copy, Clone, PartialEq)] -pub struct Symbol<'t>(&'t [u8]); +pub struct Symbol<'t> { + index: SymbolIndex, + data: &'t [u8], +} impl<'t> Symbol<'t> { + /// The index of this symbol in the containing symbol stream. + #[inline] + pub fn index(&self) -> SymbolIndex { + self.index + } + /// Returns the kind of symbol identified by this Symbol. #[inline] pub fn raw_kind(&self) -> SymbolKind { - debug_assert!(self.0.len() >= 2); - self.0.pread_with(0, LE).unwrap_or_default() + debug_assert!(self.data.len() >= 2); + self.data.pread_with(0, LE).unwrap_or_default() } /// Returns the raw bytes of this symbol record, including the symbol type and extra data, but /// not including the preceding symbol length indicator. #[inline] pub fn raw_bytes(&self) -> &'t [u8] { - self.0 + self.data } /// Parse the symbol into the `SymbolData` it contains. @@ -212,7 +162,7 @@ impl<'t> fmt::Debug for Symbol<'t> { f, "Symbol{{ kind: 0x{:4x} [{} bytes] }}", self.raw_kind(), - self.0.len() + self.data.len() ) } } @@ -1286,6 +1236,66 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ExportSymbol<'t> { } } +/// PDB symbol tables contain names, locations, and metadata about functions, global/static data, +/// constants, data types, and more. +/// +/// The `SymbolTable` holds a `SourceView` referencing the symbol table inside the PDB file. All the +/// data structures returned by a `SymbolTable` refer to that buffer. +/// +/// # Example +/// +/// ``` +/// # use pdb::FallibleIterator; +/// # +/// # fn test() -> pdb::Result { +/// let file = std::fs::File::open("fixtures/self/foo.pdb")?; +/// let mut pdb = pdb::PDB::open(file)?; +/// +/// let symbol_table = pdb.global_symbols()?; +/// let address_map = pdb.address_map()?; +/// +/// # let mut count: usize = 0; +/// let mut symbols = symbol_table.iter(); +/// while let Some(symbol) = symbols.next()? { +/// match symbol.parse() { +/// Ok(pdb::SymbolData::Public(data)) if data.function => { +/// // we found the location of a function! +/// let rva = data.offset.to_rva(&address_map).unwrap_or_default(); +/// println!("{} is {}", rva, data.name); +/// # count += 1; +/// } +/// _ => {} +/// } +/// } +/// +/// # Ok(count) +/// # } +/// # assert!(test().expect("test") > 2000); +/// ``` +#[derive(Debug)] +pub struct SymbolTable<'s> { + stream: Stream<'s>, +} + +impl<'s> SymbolTable<'s> { + /// Parses a symbol table from raw stream data. + pub(crate) fn parse(stream: Stream<'s>) -> Result { + Ok(SymbolTable { stream }) + } + + /// Returns an iterator that can traverse the symbol table in sequential order. + pub fn iter(&self) -> SymbolIter<'_> { + SymbolIter::new(self.stream.parse_buffer()) + } + + /// Returns an iterator over symbols starting at the given index. + pub fn iter_at(&self, index: SymbolIndex) -> SymbolIter<'_> { + let mut iter = self.iter(); + iter.seek(index); + iter + } +} + /// A `SymbolIter` iterates over a `SymbolTable`, producing `Symbol`s. /// /// Symbol tables are represented internally as a series of records, each of which have a length, a @@ -1333,6 +1343,8 @@ impl<'t> FallibleIterator for SymbolIter<'t> { fn next(&mut self) -> Result> { while !self.buf.is_empty() { + let index = SymbolIndex(self.buf.pos() as u32); + // read the length of the next symbol let symbol_length = self.buf.parse::()? as usize; if symbol_length < 2 { @@ -1342,7 +1354,7 @@ impl<'t> FallibleIterator for SymbolIter<'t> { // grab the symbol itself let data = self.buf.take(symbol_length)?; - let symbol = Symbol(data); + let symbol = Symbol { index, data }; // skip over padding in the symbol table match symbol.raw_kind() { @@ -1364,7 +1376,10 @@ mod tests { fn kind_0006() { let data = &[6, 0]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x0006); assert_eq!(symbol.parse().expect("parse"), SymbolData::ScopeEnd); } @@ -1373,7 +1388,10 @@ mod tests { fn kind_1101() { let data = &[1, 17, 0, 0, 0, 0, 42, 32, 67, 73, 76, 32, 42, 0]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1101); assert_eq!( symbol.parse().expect("parse"), @@ -1388,7 +1406,10 @@ mod tests { fn kind_1106() { let data = &[6, 17, 120, 34, 0, 0, 18, 0, 116, 104, 105, 115, 0, 0]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1106); assert_eq!( symbol.parse().expect("parse"), @@ -1408,7 +1429,10 @@ mod tests { 110, 115, 0, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x110e); assert_eq!( symbol.parse().expect("parse"), @@ -1430,7 +1454,10 @@ mod tests { fn kind_1124() { let data = &[36, 17, 115, 116, 100, 0]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1124); assert_eq!( symbol.parse().expect("parse"), @@ -1444,7 +1471,10 @@ mod tests { 37, 17, 0, 0, 0, 0, 108, 0, 0, 0, 1, 0, 66, 97, 122, 58, 58, 102, 95, 112, 117, 98, 108, 105, 99, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1125); assert_eq!( symbol.parse().expect("parse"), @@ -1461,7 +1491,10 @@ mod tests { #[test] fn kind_1108() { let data = &[8, 17, 112, 6, 0, 0, 118, 97, 95, 108, 105, 115, 116, 0]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1108); assert_eq!( symbol.parse().expect("parse"), @@ -1478,7 +1511,10 @@ mod tests { 7, 17, 201, 18, 0, 0, 1, 0, 95, 95, 73, 83, 65, 95, 65, 86, 65, 73, 76, 65, 66, 76, 69, 95, 83, 83, 69, 50, 0, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1107); assert_eq!( symbol.parse().expect("parse"), @@ -1497,7 +1533,10 @@ mod tests { 13, 17, 116, 0, 0, 0, 16, 0, 0, 0, 3, 0, 95, 95, 105, 115, 97, 95, 97, 118, 97, 105, 108, 97, 98, 108, 101, 0, 0, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x110d); assert_eq!( symbol.parse().expect("parse"), @@ -1520,7 +1559,10 @@ mod tests { 12, 17, 32, 0, 0, 0, 240, 36, 1, 0, 2, 0, 36, 120, 100, 97, 116, 97, 115, 121, 109, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x110c); assert_eq!( symbol.parse().expect("parse"), @@ -1543,7 +1585,10 @@ mod tests { 39, 17, 0, 0, 0, 0, 128, 4, 0, 0, 182, 0, 99, 97, 112, 116, 117, 114, 101, 95, 99, 117, 114, 114, 101, 110, 116, 95, 99, 111, 110, 116, 101, 120, 116, 0, 0, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1127); assert_eq!( symbol.parse().expect("parse"), @@ -1564,7 +1609,10 @@ mod tests { 16, 0, 0, 64, 85, 0, 0, 1, 0, 0, 66, 97, 122, 58, 58, 102, 95, 112, 114, 111, 116, 101, 99, 116, 101, 100, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1110); assert_eq!( symbol.parse().expect("parse"), @@ -1604,7 +1652,10 @@ mod tests { 128, 16, 0, 0, 196, 87, 0, 0, 1, 0, 128, 95, 95, 115, 99, 114, 116, 95, 99, 111, 109, 109, 111, 110, 95, 109, 97, 105, 110, 0, 0, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x110f); assert_eq!( symbol.parse().expect("parse"), @@ -1644,7 +1695,10 @@ mod tests { 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 76, 73, 78, 75, 0, 0, 0, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x1116); assert_eq!( symbol.parse().expect("parse"), @@ -1690,7 +1744,10 @@ mod tests { 109, 105, 122, 105, 110, 103, 32, 67, 111, 109, 112, 105, 108, 101, 114, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x113c); assert_eq!( symbol.parse().expect("parse"), @@ -1732,7 +1789,10 @@ mod tests { fn kind_113e() { let data = &[62, 17, 193, 19, 0, 0, 1, 0, 116, 104, 105, 115, 0, 0]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x113e); assert_eq!( symbol.parse().expect("parse"), @@ -1759,7 +1819,10 @@ mod tests { fn kind_114c() { let data = &[76, 17, 95, 17, 0, 0]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x114c); assert_eq!( symbol.parse().expect("parse"), @@ -1773,7 +1836,10 @@ mod tests { 77, 17, 144, 1, 0, 0, 208, 1, 0, 0, 121, 17, 0, 0, 12, 6, 3, 0, ]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x114d); assert_eq!( symbol.parse().expect("parse"), @@ -1791,7 +1857,10 @@ mod tests { fn kind_114e() { let data = &[78, 17]; - let symbol = Symbol(data); + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; assert_eq!(symbol.raw_kind(), 0x114e); assert_eq!(symbol.parse().expect("parse"), SymbolData::InlineSiteEnd); } From b0edcb724a7d53d44461842575bf4d1dcc3f49be Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 21 Aug 2019 18:20:38 +0200 Subject: [PATCH 14/31] Expose a higher-level inlinee line iterator --- src/modi/c13.rs | 361 ++++++++++++++++++++++++++++++-------- src/modi/mod.rs | 47 +++-- src/symbol/annotations.rs | 134 +------------- src/symbol/mod.rs | 2 +- 4 files changed, 320 insertions(+), 224 deletions(-) diff --git a/src/modi/c13.rs b/src/modi/c13.rs index 5ed1a70..34dc14f 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -1,7 +1,8 @@ -use scroll::Pread; +use scroll::{ctx::TryFromCtx, Pread}; use crate::common::*; use crate::modi::{constants, FileChecksum, FileIndex, FileInfo, LineInfo, LineInfoKind}; +use crate::symbol::{BinaryAnnotation, BinaryAnnotationsIter, InlineSiteSymbol}; use crate::FallibleIterator; #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -97,79 +98,123 @@ impl<'a> FallibleIterator for DebugSubsectionIterator<'a> { } #[derive(Clone, Copy, Debug, Default, Pread)] -struct DebugLinesHeader { - /// Section offset of this line contribution. - offset: PdbInternalSectionOffset, - /// See LineFlags enumeration. - flags: u16, - /// Code size of this line contribution. - code_size: u32, +struct DebugInlineeLinesHeader { + /// The signature of the inlinees + signature: u32, } -impl DebugLinesHeader { - fn has_columns(self) -> bool { - self.flags & constants::CV_LINES_HAVE_COLUMNS != 0 +impl DebugInlineeLinesHeader { + pub fn has_extra_files(self) -> bool { + self.signature == constants::CV_INLINEE_SOURCE_LINE_SIGNATURE_EX } } -#[derive(Clone, Copy, Debug, Default, Pread)] -struct DebugInlineesHeader { - /// The signature of the inlinees - signature: u32, -} - -#[derive(Clone, Copy, Debug, Default, Pread)] -pub struct InlineeSourceLine { +#[derive(Clone, Copy, Debug, Default, PartialEq)] +pub struct InlineeSourceLine<'a> { pub inlinee: ItemId, - // This should be FileIndex - pub file_id: u32, - pub source_line_num: u32, + pub file_id: FileIndex, + pub line: u32, + extra_files: &'a [u8], } -#[derive(Debug, Clone)] -struct DebugInlineesSubsection<'a> { - header: DebugInlineesHeader, - data: &'a [u8], +impl<'a> InlineeSourceLine<'a> { + // TODO(ja): Implement extra files iterator } -impl<'a> DebugInlineesSubsection<'a> { - fn parse(data: &'a [u8]) -> Result { - let mut buf = ParseBuffer::from(data); - let header = buf.parse()?; - let data = &data[buf.pos()..]; - Ok(DebugInlineesSubsection { header, data }) - } +impl<'a> TryFromCtx<'a, DebugInlineeLinesHeader> for InlineeSourceLine<'a> { + type Error = Error; + type Size = usize; - fn lines(&self) -> DebugInlineesSourceLineIterator<'a> { - DebugInlineesSourceLineIterator { - header: self.header, - buf: ParseBuffer::from(self.data), - } + fn try_from_ctx(this: &'a [u8], header: DebugInlineeLinesHeader) -> Result<(Self, Self::Size)> { + let mut buf = ParseBuffer::from(this); + let inlinee = buf.parse()?; + let file_id = buf.parse()?; + let line = buf.parse()?; + + let extra_files = if header.has_extra_files() { + let file_count = buf.parse::()? as usize; + buf.take(file_count * std::mem::size_of::())? + } else { + &[] + }; + + let source_line = Self { + inlinee, + file_id, + line, + extra_files, + }; + + Ok((source_line, buf.pos())) } } #[derive(Debug, Clone, Default)] -struct DebugInlineesSourceLineIterator<'a> { - header: DebugInlineesHeader, +struct DebugInlineeLinesIterator<'a> { + header: DebugInlineeLinesHeader, buf: ParseBuffer<'a>, } -impl<'a> FallibleIterator for DebugInlineesSourceLineIterator<'a> { - type Item = InlineeSourceLine; +impl<'a> FallibleIterator for DebugInlineeLinesIterator<'a> { + type Item = InlineeSourceLine<'a>; type Error = Error; fn next(&mut self) -> Result> { - if self.header.signature != constants::CV_INLINEE_SOURCE_LINE_SIGNATURE { - return Ok(None); - } if self.buf.is_empty() { Ok(None) } else { - Ok(Some(self.buf.parse()?)) + Ok(Some(self.buf.parse_with(self.header)?)) } } } +#[derive(Clone, Debug, Default)] +struct DebugInlineeLinesSubsection<'a> { + header: DebugInlineeLinesHeader, + data: &'a [u8], +} + +impl<'a> DebugInlineeLinesSubsection<'a> { + fn parse(data: &'a [u8]) -> Result { + let mut buf = ParseBuffer::from(data); + let header = buf.parse::()?; + + Ok(DebugInlineeLinesSubsection { + header, + data: &data[buf.pos()..], + }) + } + + /// Iterate through all inlinees. + fn lines(&self) -> DebugInlineeLinesIterator<'a> { + DebugInlineeLinesIterator { + header: self.header, + buf: ParseBuffer::from(self.data), + } + } + + /// Retrieve the inlinee source line for the given inlinee. + fn find(&self, inlinee: ItemId) -> Result>> { + self.lines().find(|line| line.inlinee == inlinee) + } +} + +#[derive(Clone, Copy, Debug, Default, Pread)] +struct DebugLinesHeader { + /// Section offset of this line contribution. + offset: PdbInternalSectionOffset, + /// See LineFlags enumeration. + flags: u16, + /// Code size of this line contribution. + code_size: u32, +} + +impl DebugLinesHeader { + fn has_columns(self) -> bool { + self.flags & constants::CV_LINES_HAVE_COLUMNS != 0 + } +} + struct DebugLinesSubsection<'a> { header: DebugLinesHeader, data: &'a [u8], @@ -581,7 +626,7 @@ impl<'a> FallibleIterator for C13LineIterator<'a> { return Ok(Some(LineInfo { offset: section_header.offset + line_entry.offset, - length: None, + length: None, // TODO(ja): Infer length from the next entry or the parent..? file_index: FileIndex(block_header.file_index), line_start: line_entry.start_line, line_end: line_entry.end_line, @@ -612,31 +657,132 @@ impl<'a> FallibleIterator for C13LineIterator<'a> { #[derive(Clone, Debug, Default)] pub struct C13InlineeLineIterator<'a> { - /// iterator over the inline source lines - lines: DebugInlineesSourceLineIterator<'a>, - /// Iterator over all subsections in the current module. - sections: DebugSubsectionIterator<'a>, + annotations: BinaryAnnotationsIter<'a>, + file_index: FileIndex, + code_offset_base: u32, + code_offset: PdbInternalSectionOffset, + code_length: u32, + current_line: u32, + current_line_length: u32, + current_col_start: u32, + current_col_end: u32, + line_kind: LineInfoKind, +} + +impl<'a> C13InlineeLineIterator<'a> { + fn new( + parent_offset: PdbInternalSectionOffset, + inline_site: &InlineSiteSymbol<'a>, + inlinee_line: InlineeSourceLine<'a>, + ) -> Self { + C13InlineeLineIterator { + annotations: inline_site.annotations.iter(), + file_index: inlinee_line.file_id, + code_offset_base: 0, + code_offset: parent_offset, + code_length: 0, + current_line: inlinee_line.line, + current_line_length: 1, + current_col_start: 1, + current_col_end: 100_000, // TODO(ja): Is this a good start value? + line_kind: LineInfoKind::Expression, + } + } } impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { - type Item = InlineeSourceLine; + type Item = LineInfo; type Error = Error; fn next(&mut self) -> Result> { - loop { - if let Some(line) = self.lines.next()? { - return Ok(Some(line)); - } - if let Some(section) = self.sections.next()? { - if section.kind == DebugSubsectionKind::InlineeLines { - let inlinees_section = DebugInlineesSubsection::parse(section.data)?; - self.lines = inlinees_section.lines(); + while let Some(op) = self.annotations.next()? { + match op { + BinaryAnnotation::CodeOffset(new_val) => { + self.code_offset.offset = new_val; } - continue; - } else { - return Ok(None); + BinaryAnnotation::ChangeCodeOffsetBase(new_val) => { + self.code_offset_base = new_val; + } + BinaryAnnotation::ChangeCodeOffset(delta) => { + self.code_offset = self.code_offset.wrapping_add(delta); + } + BinaryAnnotation::ChangeCodeLength(val) => { + // TODO(ja): Fix this + // if let Some(last_loc) = rv.last_mut() { + // if last_loc.length.is_none() && last_loc.kind == self.line_kind { + // last_loc.length = Some(val); + // } + // } + self.code_offset = self.code_offset.wrapping_add(val); + } + BinaryAnnotation::ChangeFile(new_val) => { + self.file_index = FileIndex(new_val); + } + BinaryAnnotation::ChangeLineOffset(delta) => { + self.current_line = (i64::from(self.current_line) + i64::from(delta)) as u32; + } + BinaryAnnotation::ChangeLineEndDelta(new_val) => { + self.current_line_length = new_val; + } + BinaryAnnotation::ChangeRangeKind(kind) => { + self.line_kind = match kind { + 0 => LineInfoKind::Expression, + 1 => LineInfoKind::Statement, + _ => self.line_kind, + }; + } + BinaryAnnotation::ChangeColumnStart(new_val) => { + self.current_col_start = new_val; + } + BinaryAnnotation::ChangeColumnEndDelta(delta) => { + self.current_col_end = + (i64::from(self.current_col_end) + i64::from(delta)) as u32; + } + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(code_delta, line_delta) => { + self.code_offset = PdbInternalSectionOffset { + section: self.code_offset.section, + offset: (i64::from(self.code_offset.offset) + i64::from(code_delta)) as u32, + }; + self.current_line = + (i64::from(self.current_line) + i64::from(line_delta)) as u32; + } + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(new_code_length, code_delta) => { + self.code_length = new_code_length; + self.code_offset = PdbInternalSectionOffset { + section: self.code_offset.section, + offset: (i64::from(self.code_offset.offset) + i64::from(code_delta)) as u32, + }; + } + BinaryAnnotation::ChangeColumnEnd(new_val) => { + self.current_col_end = new_val; + } + } + + if op.emits_line_info() { + // TODO(ja): Fix this + // if let Some(last_loc) = rv.last_mut() { + // if last_loc.length.is_none() && last_loc.kind == self.line_kind { + // last_loc.length = Some(self.code_offset.offset - self.code_offset_base); + // } + // } + + let line_info = LineInfo { + kind: self.line_kind, + file_index: self.file_index, + offset: self.code_offset + self.code_offset_base, + length: Some(self.code_length), + line_start: self.current_line, + line_end: self.current_line + self.current_line_length, + column_start: Some(self.current_col_start as u16), + column_end: Some(self.current_col_end as u16), + }; + + self.code_length = 0; + return Ok(Some(line_info)); } } + + Ok(None) } } @@ -664,22 +810,31 @@ impl<'a> FallibleIterator for C13FileIterator<'a> { pub struct C13LineProgram<'a> { data: &'a [u8], file_checksums: DebugFileChecksumsSubsection<'a>, + inlinee_lines: DebugInlineeLinesSubsection<'a>, } impl<'a> C13LineProgram<'a> { pub(crate) fn parse(data: &'a [u8]) -> Result { - let checksums_data = DebugSubsectionIterator::new(data) - .find(|sec| sec.kind == DebugSubsectionKind::FileChecksums)? - .map(|sec| sec.data); - - let file_checksums = match checksums_data { - Some(d) => DebugFileChecksumsSubsection::parse(d)?, - None => DebugFileChecksumsSubsection::default(), - }; + let mut file_checksums = DebugFileChecksumsSubsection::default(); + let mut inlinee_lines = DebugInlineeLinesSubsection::default(); + + let mut subsections = DebugSubsectionIterator::new(data); + while let Some(sec) = subsections.next()? { + match sec.kind { + DebugSubsectionKind::FileChecksums => { + file_checksums = DebugFileChecksumsSubsection::parse(sec.data)? + } + DebugSubsectionKind::InlineeLines => { + inlinee_lines = DebugInlineeLinesSubsection::parse(sec.data)? + } + _ => {} + } + } Ok(C13LineProgram { data, file_checksums, + inlinee_lines, }) } @@ -712,16 +867,22 @@ impl<'a> C13LineProgram<'a> { } } - pub(crate) fn files(&self) -> C13FileIterator<'a> { - C13FileIterator { - checksums: self.file_checksums.entries().unwrap_or_default(), + pub(crate) fn inlinee_lines( + &self, + parent_offset: PdbInternalSectionOffset, + inline_site: &InlineSiteSymbol<'a>, + ) -> C13InlineeLineIterator<'a> { + match self.inlinee_lines.find(inline_site.inlinee) { + Ok(Some(inlinee_line)) => { + C13InlineeLineIterator::new(parent_offset, inline_site, inlinee_line) + } + _ => C13InlineeLineIterator::default(), } } - pub(crate) fn inlinee_lines(&self) -> C13InlineeLineIterator<'a> { - C13InlineeLineIterator { - sections: DebugSubsectionIterator::new(self.data), - lines: Default::default(), + pub(crate) fn files(&self) -> C13FileIterator<'a> { + C13FileIterator { + checksums: self.file_checksums.entries().unwrap_or_default(), } } @@ -739,3 +900,47 @@ impl<'a> C13LineProgram<'a> { }) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_inlinee_lines() { + let data = &[ + 0, 0, 0, 0, 254, 18, 0, 0, 104, 1, 0, 0, 24, 0, 0, 0, 253, 18, 0, 0, 104, 1, 0, 0, 28, + 0, 0, 0, 1, 0, 0, 128, 192, 0, 0, 0, 129, 2, 0, 0, 7, 0, 0, 128, 240, 0, 0, 0, 121, 9, + 0, 0, 8, 0, 0, 128, 240, 0, 0, 0, 62, 15, 0, 0, 9, 0, 0, 128, 240, 0, 0, 0, 10, 7, 0, + 0, 10, 0, 0, 128, 16, 2, 0, 0, 85, 1, 0, 0, 11, 0, 0, 128, 208, 5, 0, 0, 6, 4, 0, 0, + 12, 0, 0, 128, 208, 5, 0, 0, 211, 0, 0, 0, 14, 0, 0, 128, 208, 5, 0, 0, 119, 0, 0, 0, + 16, 0, 0, 128, 232, 5, 0, 0, 125, 0, 0, 0, 18, 0, 0, 128, 0, 6, 0, 0, 51, 3, 0, 0, 19, + 0, 0, 128, 0, 6, 0, 0, 236, 2, 0, 0, 20, 0, 0, 128, 0, 6, 0, 0, 4, 2, 0, 0, 21, 0, 0, + 128, 0, 6, 0, 0, 138, 2, 0, 0, 23, 0, 0, 128, 224, 1, 0, 0, 55, 1, 0, 0, 24, 0, 0, 128, + 0, 6, 0, 0, 220, 1, 0, 0, 25, 0, 0, 128, 240, 0, 0, 0, 72, 8, 0, 0, 26, 0, 0, 128, 240, + 0, 0, 0, 51, 15, 0, 0, 27, 0, 0, 128, 224, 4, 0, 0, 92, 0, 0, 0, 28, 0, 0, 128, 240, 0, + 0, 0, 113, 8, 0, 0, 29, 0, 0, 128, 240, 0, 0, 0, 71, 10, 0, 0, + ]; + + let inlinee_lines = DebugInlineeLinesSubsection::parse(data).expect("parse inlinee lines"); + assert!(!inlinee_lines.header.has_extra_files()); + + let lines: Vec<_> = inlinee_lines + .lines() + .collect() + .expect("collect inlinee lines"); + + assert_eq!(lines.len(), 22); + + println!("{:#?}", lines); + + assert_eq!( + lines[0], + InlineeSourceLine { + inlinee: 0x12FE, + file_id: FileIndex(0x168), + line: 24, + extra_files: &[], + } + ) + } +} diff --git a/src/modi/mod.rs b/src/modi/mod.rs index 215465a..be3abc0 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -1,14 +1,14 @@ +use scroll::{ctx::TryFromCtx, Endian}; + use crate::common::*; use crate::dbi::Module; use crate::msf::Stream; -use crate::symbol::{SymbolIndex, SymbolIter}; +use crate::symbol::{InlineSiteSymbol, SymbolIndex, SymbolIter}; use crate::FallibleIterator; mod c13; mod constants; -pub use crate::modi::c13::InlineeSourceLine; - #[derive(Clone, Copy, Debug)] enum LinesSize { C11(usize), @@ -117,6 +117,15 @@ pub struct FileInfo<'a> { #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct FileIndex(pub u32); +impl<'a> TryFromCtx<'a, Endian> for FileIndex { + type Error = scroll::Error; + type Size = usize; + + fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + u32::try_from_ctx(this, le).map(|(num, s)| (Self(num), s)) + } +} + /// The kind of source construct a line info is referring to. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum LineInfoKind { @@ -126,6 +135,12 @@ pub enum LineInfoKind { Statement, } +impl Default for LineInfoKind { + fn default() -> Self { + LineInfoKind::Expression + } +} + /// Mapping of a source code offset to a source file location. /// /// A line entry is always valid up to the subsequent entry. @@ -178,15 +193,6 @@ impl<'a> LineProgram<'a> { } } - /// Returns an iterator over all inlinees of this module. - pub fn inlinee_lines(&self) -> InlineeLineIterator<'a> { - match self.inner { - LineProgramInner::C13(ref inner) => InlineeLineIterator { - inner: InlineeLineIteratorInner::C13(inner.inlinee_lines()), - }, - } - } - /// Returns an iterator over all file records of this module. pub fn files(&self) -> FileIterator<'a> { match self.inner { @@ -213,6 +219,21 @@ impl<'a> LineProgram<'a> { } } + /// Returns an iterator over line records for an inline site. + pub fn inlinee_lines( + &self, + parent_offset: PdbInternalSectionOffset, + inline_site: &InlineSiteSymbol<'a>, + ) -> InlineeLineIterator<'a> { + match self.inner { + LineProgramInner::C13(ref inner) => InlineeLineIterator { + inner: InlineeLineIteratorInner::C13( + inner.inlinee_lines(parent_offset, inline_site), + ), + }, + } + } + /// Looks up file information for the specified file. pub fn get_file_info(&self, offset: FileIndex) -> Result> { match self.inner { @@ -271,7 +292,7 @@ impl Default for InlineeLineIterator<'_> { } impl<'a> FallibleIterator for InlineeLineIterator<'a> { - type Item = InlineeSourceLine; + type Item = LineInfo; type Error = Error; fn next(&mut self) -> Result> { diff --git a/src/symbol/annotations.rs b/src/symbol/annotations.rs index 4e036cc..c0681d4 100644 --- a/src/symbol/annotations.rs +++ b/src/symbol/annotations.rs @@ -1,8 +1,6 @@ use std::result; use crate::common::*; -use crate::modi::{FileIndex, InlineeSourceLine, LineInfo, LineInfoKind}; -use crate::symbol::SymbolIndex; use crate::FallibleIterator; /// These values correspond to the BinaryAnnotationOpcode enum from the @@ -101,6 +99,7 @@ impl BinaryAnnotation { } /// An iterator over binary annotations used by `S_INLINESITE`. +#[derive(Clone, Debug, Default)] pub struct BinaryAnnotationsIter<'t> { buffer: ParseBuffer<'t>, } @@ -210,34 +209,13 @@ impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { } } -#[derive(Copy, Clone, Debug)] -pub struct InstructionLocation { - pub range_kind: LineInfoKind, - pub offset_start: u32, - pub offset_end: u32, - pub line_start: u32, - pub line_end: u32, - pub col_start: u32, - pub col_end: u32, -} - -#[derive(Clone, Debug)] -pub struct Inlinee { - pub id: ItemId, - pub ptr: SymbolIndex, - pub parent: SymbolIndex, - pub file_offset: FileIndex, - pub base_line_num: u32, - pub locations: Vec, -} - /// Binary annotations of a symbol. /// /// The binary annotation mechanism supports recording a list of annotations in an instruction /// stream. The X64 unwind code and the DWARF standard have a similar design. /// /// Binary annotations are primarily used as line programs for inline function calls. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct BinaryAnnotations<'t> { data: &'t [u8], } @@ -248,114 +226,6 @@ impl<'t> BinaryAnnotations<'t> { BinaryAnnotations { data } } - /// Evalutes the annotations into line infos. - /// - /// `start_offset` is the address of the function that is the base for this - /// inline site. The `source_line` is the base of where the source - /// information is evaluated from. - pub fn evaluate( - &self, - start_offset: PdbInternalSectionOffset, - source_line: &InlineeSourceLine, - ) -> Result> { - let mut iter = self.iter(); - let mut rv: Vec = vec![]; - - let mut file_index = FileIndex(source_line.file_id); - let mut code_offset_base = 0; - let mut code_offset = start_offset; - let mut code_length = 0; - let mut current_line = source_line.source_line_num; - let mut current_line_length = 1; - let mut current_col_start = 1; - let mut current_col_end = 100_000; - let mut range_kind = LineInfoKind::Expression; - - while let Some(op) = iter.next()? { - match op { - BinaryAnnotation::CodeOffset(new_val) => { - code_offset.offset = new_val; - } - BinaryAnnotation::ChangeCodeOffsetBase(new_val) => { - code_offset_base = new_val; - } - BinaryAnnotation::ChangeCodeOffset(delta) => { - code_offset = code_offset.wrapping_add(delta); - } - BinaryAnnotation::ChangeCodeLength(val) => { - if let Some(last_loc) = rv.last_mut() { - if last_loc.length.is_none() && last_loc.kind == range_kind { - last_loc.length = Some(val); - } - } - code_offset = code_offset.wrapping_add(val); - } - BinaryAnnotation::ChangeFile(new_val) => { - file_index = FileIndex(new_val); - } - BinaryAnnotation::ChangeLineOffset(delta) => { - current_line = (i64::from(current_line) + i64::from(delta)) as u32; - } - BinaryAnnotation::ChangeLineEndDelta(new_val) => { - current_line_length = new_val; - } - BinaryAnnotation::ChangeRangeKind(kind) => { - range_kind = match kind { - 0 => LineInfoKind::Expression, - 1 => LineInfoKind::Statement, - _ => range_kind, - }; - } - BinaryAnnotation::ChangeColumnStart(new_val) => { - current_col_start = new_val; - } - BinaryAnnotation::ChangeColumnEndDelta(delta) => { - current_col_end = (i64::from(current_col_end) + i64::from(delta)) as u32; - } - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(code_delta, line_delta) => { - code_offset = PdbInternalSectionOffset { - section: code_offset.section, - offset: (i64::from(code_offset.offset) + i64::from(code_delta)) as u32, - }; - current_line = (i64::from(current_line) + i64::from(line_delta)) as u32; - } - BinaryAnnotation::ChangeCodeLengthAndCodeOffset(new_code_length, code_delta) => { - code_length = new_code_length; - code_offset = PdbInternalSectionOffset { - section: code_offset.section, - offset: (i64::from(code_offset.offset) + i64::from(code_delta)) as u32, - }; - } - BinaryAnnotation::ChangeColumnEnd(new_val) => { - current_col_end = new_val; - } - } - - if op.emits_line_info() { - if let Some(last_loc) = rv.last_mut() { - if last_loc.length.is_none() && last_loc.kind == range_kind { - last_loc.length = Some(code_offset.offset - code_offset_base); - } - } - - rv.push(LineInfo { - kind: range_kind, - file_index, - offset: code_offset + code_offset_base, - length: Some(code_length), - line_start: current_line, - line_end: current_line + current_line_length, - column_start: Some(current_col_start as u16), - column_end: Some(current_col_end as u16), - }); - - code_length = 0; - } - } - - Ok(rv) - } - /// Iterates through binary annotations. pub fn iter(&self) -> BinaryAnnotationsIter<'t> { BinaryAnnotationsIter { diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 2ece492..7ef3150 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -839,7 +839,7 @@ pub struct InlineSiteSymbol<'t> { /// /// This might either be a [`ProcedureSymbol`] or another `InlineSiteSymbol`. /// - /// [`ProcedureSymbol`](struct.ProcedureSymbol.html) + /// [`ProcedureSymbol`]: struct.ProcedureSymbol.html pub parent: SymbolIndex, /// The end symbol of this callsite. pub end: SymbolIndex, From d15680dbf88b34a85984513628fef4997ec3362a Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 21 Aug 2019 19:28:47 +0200 Subject: [PATCH 15/31] Fix annotation parsing of optional values --- src/modi/c13.rs | 124 ++++++++++++++++++++++++++---------------------- src/modi/mod.rs | 4 +- 2 files changed, 68 insertions(+), 60 deletions(-) diff --git a/src/modi/c13.rs b/src/modi/c13.rs index 34dc14f..edf58e3 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -630,8 +630,8 @@ impl<'a> FallibleIterator for C13LineIterator<'a> { file_index: FileIndex(block_header.file_index), line_start: line_entry.start_line, line_end: line_entry.end_line, - column_start: column_entry.map(|e| e.start_column), - column_end: column_entry.map(|e| e.end_column), + column_start: column_entry.map(|e| e.start_column.into()), + column_end: column_entry.map(|e| e.end_column.into()), kind: line_entry.kind, })); } @@ -661,12 +661,13 @@ pub struct C13InlineeLineIterator<'a> { file_index: FileIndex, code_offset_base: u32, code_offset: PdbInternalSectionOffset, - code_length: u32, - current_line: u32, - current_line_length: u32, - current_col_start: u32, - current_col_end: u32, + code_length: Option, + line: u32, + line_length: u32, + col_start: Option, + col_end: Option, line_kind: LineInfoKind, + last_info: Option, } impl<'a> C13InlineeLineIterator<'a> { @@ -680,12 +681,13 @@ impl<'a> C13InlineeLineIterator<'a> { file_index: inlinee_line.file_id, code_offset_base: 0, code_offset: parent_offset, - code_length: 0, - current_line: inlinee_line.line, - current_line_length: 1, - current_col_start: 1, - current_col_end: 100_000, // TODO(ja): Is this a good start value? + code_length: None, + line: inlinee_line.line, + line_length: 1, + col_start: None, + col_end: None, line_kind: LineInfoKind::Expression, + last_info: None, } } } @@ -697,32 +699,32 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { fn next(&mut self) -> Result> { while let Some(op) = self.annotations.next()? { match op { - BinaryAnnotation::CodeOffset(new_val) => { - self.code_offset.offset = new_val; + BinaryAnnotation::CodeOffset(code_offset) => { + self.code_offset.offset = code_offset; } - BinaryAnnotation::ChangeCodeOffsetBase(new_val) => { - self.code_offset_base = new_val; + BinaryAnnotation::ChangeCodeOffsetBase(code_offset_base) => { + self.code_offset_base = code_offset_base; } BinaryAnnotation::ChangeCodeOffset(delta) => { self.code_offset = self.code_offset.wrapping_add(delta); } - BinaryAnnotation::ChangeCodeLength(val) => { - // TODO(ja): Fix this - // if let Some(last_loc) = rv.last_mut() { - // if last_loc.length.is_none() && last_loc.kind == self.line_kind { - // last_loc.length = Some(val); - // } - // } - self.code_offset = self.code_offset.wrapping_add(val); + BinaryAnnotation::ChangeCodeLength(code_length) => { + if let Some(ref mut last_info) = self.last_info { + if last_info.length.is_none() && last_info.kind == self.line_kind { + last_info.length = Some(code_length); + } + } + + self.code_offset = self.code_offset.wrapping_add(code_length); } BinaryAnnotation::ChangeFile(new_val) => { self.file_index = FileIndex(new_val); } BinaryAnnotation::ChangeLineOffset(delta) => { - self.current_line = (i64::from(self.current_line) + i64::from(delta)) as u32; + self.line = (i64::from(self.line) + i64::from(delta)) as u32; } - BinaryAnnotation::ChangeLineEndDelta(new_val) => { - self.current_line_length = new_val; + BinaryAnnotation::ChangeLineEndDelta(line_length) => { + self.line_length = line_length; } BinaryAnnotation::ChangeRangeKind(kind) => { self.line_kind = match kind { @@ -731,58 +733,64 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { _ => self.line_kind, }; } - BinaryAnnotation::ChangeColumnStart(new_val) => { - self.current_col_start = new_val; + BinaryAnnotation::ChangeColumnStart(col_start) => { + self.col_start = Some(col_start); } BinaryAnnotation::ChangeColumnEndDelta(delta) => { - self.current_col_end = - (i64::from(self.current_col_end) + i64::from(delta)) as u32; + self.col_end = self.col_end.map(|col_end| { + (i64::from(col_end) + i64::from(delta)) as u32 + }) } BinaryAnnotation::ChangeCodeOffsetAndLineOffset(code_delta, line_delta) => { self.code_offset = PdbInternalSectionOffset { section: self.code_offset.section, offset: (i64::from(self.code_offset.offset) + i64::from(code_delta)) as u32, }; - self.current_line = - (i64::from(self.current_line) + i64::from(line_delta)) as u32; + self.line = (i64::from(self.line) + i64::from(line_delta)) as u32; } - BinaryAnnotation::ChangeCodeLengthAndCodeOffset(new_code_length, code_delta) => { - self.code_length = new_code_length; + BinaryAnnotation::ChangeCodeLengthAndCodeOffset(code_length, code_delta) => { + self.code_length = Some(code_length); self.code_offset = PdbInternalSectionOffset { section: self.code_offset.section, offset: (i64::from(self.code_offset.offset) + i64::from(code_delta)) as u32, }; } - BinaryAnnotation::ChangeColumnEnd(new_val) => { - self.current_col_end = new_val; + BinaryAnnotation::ChangeColumnEnd(col_end) => { + self.col_end = Some(col_end); } } - if op.emits_line_info() { - // TODO(ja): Fix this - // if let Some(last_loc) = rv.last_mut() { - // if last_loc.length.is_none() && last_loc.kind == self.line_kind { - // last_loc.length = Some(self.code_offset.offset - self.code_offset_base); - // } - // } - - let line_info = LineInfo { - kind: self.line_kind, - file_index: self.file_index, - offset: self.code_offset + self.code_offset_base, - length: Some(self.code_length), - line_start: self.current_line, - line_end: self.current_line + self.current_line_length, - column_start: Some(self.current_col_start as u16), - column_end: Some(self.current_col_end as u16), - }; + if !op.emits_line_info() { + continue; + } + + if let Some(ref mut last_info) = self.last_info { + if last_info.length.is_none() && last_info.kind == self.line_kind { + last_info.length = Some(self.code_offset.offset - self.code_offset_base); + } + } + + let line_info = LineInfo { + kind: self.line_kind, + file_index: self.file_index, + offset: self.code_offset + self.code_offset_base, + length: self.code_length, + line_start: self.line, + line_end: self.line + self.line_length, + column_start: self.col_start, + column_end: self.col_end, + }; - self.code_length = 0; - return Ok(Some(line_info)); + self.code_length = None; + + // Finish the previous record and emit it. The current record is stored so that the + // length can be inferred from subsequent operators or the next line info. + if let Some(last_info) = std::mem::replace(&mut self.last_info, Some(line_info)) { + return Ok(Some(last_info)); } } - Ok(None) + Ok(self.last_info.take()) } } diff --git a/src/modi/mod.rs b/src/modi/mod.rs index be3abc0..e76f176 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -160,12 +160,12 @@ pub struct LineInfo { /// /// This value is only present if column information is provided by the PDB. Even then, it is /// often zero. - pub column_start: Option, + pub column_start: Option, /// Column number of the end of the covered range. /// /// This value is only present if column information is provided by the PDB. Even then, it is /// often zero. - pub column_end: Option, + pub column_end: Option, /// Kind of this line information. pub kind: LineInfoKind, } From e6264f639372bec0d4b5c96ee0d3a47bfb0f7136 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 23 Aug 2019 16:16:16 +0200 Subject: [PATCH 16/31] Fix default line info kinds and add more tests --- src/modi/c13.rs | 108 ++++++++++++++++++++++++++++++++++++++---------- src/modi/mod.rs | 4 +- 2 files changed, 88 insertions(+), 24 deletions(-) diff --git a/src/modi/c13.rs b/src/modi/c13.rs index edf58e3..89d46ac 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -686,7 +686,7 @@ impl<'a> C13InlineeLineIterator<'a> { line_length: 1, col_start: None, col_end: None, - line_kind: LineInfoKind::Expression, + line_kind: LineInfoKind::Statement, last_info: None, } } @@ -737,9 +737,9 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { self.col_start = Some(col_start); } BinaryAnnotation::ChangeColumnEndDelta(delta) => { - self.col_end = self.col_end.map(|col_end| { - (i64::from(col_end) + i64::from(delta)) as u32 - }) + self.col_end = self + .col_end + .map(|col_end| (i64::from(col_end) + i64::from(delta)) as u32) } BinaryAnnotation::ChangeCodeOffsetAndLineOffset(code_delta, line_delta) => { self.code_offset = PdbInternalSectionOffset { @@ -913,20 +913,13 @@ impl<'a> C13LineProgram<'a> { mod tests { use super::*; + use crate::symbol::{BinaryAnnotations, SymbolIndex}; + #[test] fn test_parse_inlinee_lines() { let data = &[ 0, 0, 0, 0, 254, 18, 0, 0, 104, 1, 0, 0, 24, 0, 0, 0, 253, 18, 0, 0, 104, 1, 0, 0, 28, - 0, 0, 0, 1, 0, 0, 128, 192, 0, 0, 0, 129, 2, 0, 0, 7, 0, 0, 128, 240, 0, 0, 0, 121, 9, - 0, 0, 8, 0, 0, 128, 240, 0, 0, 0, 62, 15, 0, 0, 9, 0, 0, 128, 240, 0, 0, 0, 10, 7, 0, - 0, 10, 0, 0, 128, 16, 2, 0, 0, 85, 1, 0, 0, 11, 0, 0, 128, 208, 5, 0, 0, 6, 4, 0, 0, - 12, 0, 0, 128, 208, 5, 0, 0, 211, 0, 0, 0, 14, 0, 0, 128, 208, 5, 0, 0, 119, 0, 0, 0, - 16, 0, 0, 128, 232, 5, 0, 0, 125, 0, 0, 0, 18, 0, 0, 128, 0, 6, 0, 0, 51, 3, 0, 0, 19, - 0, 0, 128, 0, 6, 0, 0, 236, 2, 0, 0, 20, 0, 0, 128, 0, 6, 0, 0, 4, 2, 0, 0, 21, 0, 0, - 128, 0, 6, 0, 0, 138, 2, 0, 0, 23, 0, 0, 128, 224, 1, 0, 0, 55, 1, 0, 0, 24, 0, 0, 128, - 0, 6, 0, 0, 220, 1, 0, 0, 25, 0, 0, 128, 240, 0, 0, 0, 72, 8, 0, 0, 26, 0, 0, 128, 240, - 0, 0, 0, 51, 15, 0, 0, 27, 0, 0, 128, 224, 4, 0, 0, 92, 0, 0, 0, 28, 0, 0, 128, 240, 0, - 0, 0, 113, 8, 0, 0, 29, 0, 0, 128, 240, 0, 0, 0, 71, 10, 0, 0, + 0, 0, 0, ]; let inlinee_lines = DebugInlineeLinesSubsection::parse(data).expect("parse inlinee lines"); @@ -937,18 +930,89 @@ mod tests { .collect() .expect("collect inlinee lines"); - assert_eq!(lines.len(), 22); - - println!("{:#?}", lines); - - assert_eq!( - lines[0], + let expected = [ InlineeSourceLine { inlinee: 0x12FE, file_id: FileIndex(0x168), line: 24, extra_files: &[], - } - ) + }, + InlineeSourceLine { + inlinee: 0x12FD, + file_id: FileIndex(0x168), + line: 28, + extra_files: &[], + }, + ]; + + assert_eq!(lines, expected) + } + + // TODO: Parse extended version + + #[test] + fn test_inlinee_lines() { + // Obtained from a PDB compiling Breakpad's crash_generation_client.obj + + // S_GPROC32: [0001:00000120], Cb: 00000054 + // S_INLINESITE: Parent: 0000009C, End: 00000318, Inlinee: 0x1173 + // S_INLINESITE: Parent: 00000190, End: 000001EC, Inlinee: 0x1180 + // BinaryAnnotations: CodeLengthAndCodeOffset 2 3f CodeLengthAndCodeOffset 3 9 + let inline_site = InlineSiteSymbol { + parent: SymbolIndex(0x190), + end: SymbolIndex(0x1ec), + inlinee: 0x1180, + invocations: None, + annotations: BinaryAnnotations::new(&[12, 2, 63, 12, 3, 9, 0, 0]), + }; + + // Inline site from corresponding DEBUG_S_INLINEELINES subsection: + let inlinee_line = InlineeSourceLine { + inlinee: 0x1180, + file_id: FileIndex(0x270), + line: 341, + extra_files: &[], + }; + + // Parent offset from procedure root: + // S_GPROC32: [0001:00000120] + let parent_offset = PdbInternalSectionOffset { + offset: 0x120, + section: 0x1, + }; + + let iter = C13InlineeLineIterator::new(parent_offset, &inline_site, inlinee_line); + let lines: Vec<_> = iter.collect().expect("collect inlinee lines"); + + let expected = [ + LineInfo { + offset: PdbInternalSectionOffset { + section: 0x1, + offset: 0x0000015f, + }, + length: Some(2), + file_index: FileIndex(0x270), + line_start: 341, + line_end: 342, + column_start: None, + column_end: None, + kind: LineInfoKind::Statement, + }, + LineInfo { + offset: PdbInternalSectionOffset { + section: 0x1, + offset: 0x00000168, + }, + length: Some(3), + file_index: FileIndex(0x270), + line_start: 341, + line_end: 342, + column_start: None, + column_end: None, + kind: LineInfoKind::Statement, + }, + ]; + + assert_eq!(lines, expected); } } diff --git a/src/modi/mod.rs b/src/modi/mod.rs index e76f176..3f7a54c 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -137,14 +137,14 @@ pub enum LineInfoKind { impl Default for LineInfoKind { fn default() -> Self { - LineInfoKind::Expression + LineInfoKind::Statement } } /// Mapping of a source code offset to a source file location. /// /// A line entry is always valid up to the subsequent entry. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub struct LineInfo { /// Source code offset. pub offset: PdbInternalSectionOffset, From 996a45120b53a72088d5c4094cc08f73edb1b758 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 23 Aug 2019 16:40:53 +0200 Subject: [PATCH 17/31] Add more doc comments --- src/modi/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/modi/mod.rs b/src/modi/mod.rs index 3f7a54c..a220a44 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -220,6 +220,10 @@ impl<'a> LineProgram<'a> { } /// Returns an iterator over line records for an inline site. + /// + /// Note that line records are not guaranteed to be ordered by source code offset. If a + /// monotonic order by `PdbInternalSectionOffset` or `Rva` is required, the lines have to be + /// sorted manually. pub fn inlinee_lines( &self, parent_offset: PdbInternalSectionOffset, From a69921f94fbcb48c6d6364391551f195289b60af Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Mon, 26 Aug 2019 16:46:26 +0200 Subject: [PATCH 18/31] Fix comments for dbg_{start,end}_offset --- src/symbol/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 7ef3150..213be66 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -777,9 +777,9 @@ pub struct ProcedureSymbol<'t> { pub next: SymbolIndex, /// The length of the code block covered by this procedure. pub len: u32, - /// Debug start. + /// Start offset of the procedure's body code, which marks the end of the prologue. pub dbg_start_offset: u32, - /// Debug end. + /// End offset of the procedure's body code, which marks the start of the epilogue. pub dbg_end_offset: u32, /// Identifier of the procedure type. /// From 256b1735f19440fbf1a0401b9738b79bd55d19d8 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 28 Aug 2019 22:06:47 +0200 Subject: [PATCH 19/31] fix: Add more tests and fix more bugs in annotations --- src/common.rs | 17 +++++++++++ src/modi/c13.rs | 60 ++++++++++++++++++++++++++++++++------- src/modi/mod.rs | 17 ----------- src/symbol/annotations.rs | 20 ++++++------- 4 files changed, 76 insertions(+), 38 deletions(-) diff --git a/src/common.rs b/src/common.rs index 5ffaec2..f508109 100644 --- a/src/common.rs +++ b/src/common.rs @@ -625,6 +625,23 @@ impl<'a> TryFromCtx<'a, Endian> for StringRef { } } +/// Index of a file entry in the module. +/// +/// Use the [`LineProgram`] to resolve information on the file from this offset. +/// +/// [`LineProgram`]: struct.LineProgram.html +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct FileIndex(pub u32); + +impl<'a> TryFromCtx<'a, Endian> for FileIndex { + type Error = scroll::Error; + type Size = usize; + + fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + u32::try_from_ctx(this, le).map(|(num, s)| (Self(num), s)) + } +} + /// Provides little-endian access to a &[u8]. #[derive(Debug, Clone)] pub(crate) struct ParseBuffer<'b>(&'b [u8], usize); diff --git a/src/modi/c13.rs b/src/modi/c13.rs index 89d46ac..dc59a70 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -118,7 +118,7 @@ pub struct InlineeSourceLine<'a> { } impl<'a> InlineeSourceLine<'a> { - // TODO(ja): Implement extra files iterator + // TODO: Implement extra files iterator when needed. } impl<'a> TryFromCtx<'a, DebugInlineeLinesHeader> for InlineeSourceLine<'a> { @@ -717,8 +717,14 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { self.code_offset = self.code_offset.wrapping_add(code_length); } - BinaryAnnotation::ChangeFile(new_val) => { - self.file_index = FileIndex(new_val); + BinaryAnnotation::ChangeFile(file_index) => { + // NOTE: There seems to be a bug in VS2015-VS2019 compilers that generates + // invalid binary annotations when file changes are involved. This can be + // triggered by #including files directly into inline functions. The + // `ChangeFile` annotations are generated in the wrong spot or missing + // completely. This renders information on the file effectively useless in a lot + // of cases. + self.file_index = file_index; } BinaryAnnotation::ChangeLineOffset(delta) => { self.line = (i64::from(self.line) + i64::from(delta)) as u32; @@ -744,7 +750,7 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { BinaryAnnotation::ChangeCodeOffsetAndLineOffset(code_delta, line_delta) => { self.code_offset = PdbInternalSectionOffset { section: self.code_offset.section, - offset: (i64::from(self.code_offset.offset) + i64::from(code_delta)) as u32, + offset: self.code_offset.offset + code_delta, }; self.line = (i64::from(self.line) + i64::from(line_delta)) as u32; } @@ -752,7 +758,7 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { self.code_length = Some(code_length); self.code_offset = PdbInternalSectionOffset { section: self.code_offset.section, - offset: (i64::from(self.code_offset.offset) + i64::from(code_delta)) as u32, + offset: self.code_offset.offset + code_delta, }; } BinaryAnnotation::ChangeColumnEnd(col_end) => { @@ -781,6 +787,7 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { column_end: self.col_end, }; + // Code length resets with every line record. self.code_length = None; // Finish the previous record and emit it. The current record is stored so that the @@ -830,10 +837,10 @@ impl<'a> C13LineProgram<'a> { while let Some(sec) = subsections.next()? { match sec.kind { DebugSubsectionKind::FileChecksums => { - file_checksums = DebugFileChecksumsSubsection::parse(sec.data)? + file_checksums = DebugFileChecksumsSubsection::parse(sec.data)?; } DebugSubsectionKind::InlineeLines => { - inlinee_lines = DebugInlineeLinesSubsection::parse(sec.data)? + inlinee_lines = DebugInlineeLinesSubsection::parse(sec.data)?; } _ => {} } @@ -945,10 +952,41 @@ mod tests { }, ]; - assert_eq!(lines, expected) + assert_eq!(lines, expected); } - // TODO: Parse extended version + #[test] + fn test_parse_inlinee_lines_with_files() { + let data = &[ + 1, 0, 0, 0, 235, 102, 9, 0, 232, 37, 0, 0, 19, 0, 0, 0, 1, 0, 0, 0, 216, 26, 0, 0, 240, + 163, 7, 0, 176, 44, 0, 0, 120, 0, 0, 0, 1, 0, 0, 0, 120, 3, 0, 0, + ]; + + let inlinee_lines = DebugInlineeLinesSubsection::parse(data).expect("parse inlinee lines"); + assert!(inlinee_lines.header.has_extra_files()); + + let lines: Vec<_> = inlinee_lines + .lines() + .collect() + .expect("collect inlinee lines"); + + let expected = [ + InlineeSourceLine { + inlinee: 0x966EB, + file_id: FileIndex(0x25e8), + line: 19, + extra_files: &[216, 26, 0, 0], + }, + InlineeSourceLine { + inlinee: 0x7A3F0, + file_id: FileIndex(0x2cb0), + line: 120, + extra_files: &[120, 3, 0, 0], + }, + ]; + + assert_eq!(lines, expected) + } #[test] fn test_inlinee_lines() { @@ -988,7 +1026,7 @@ mod tests { LineInfo { offset: PdbInternalSectionOffset { section: 0x1, - offset: 0x0000015f, + offset: 0x015f, }, length: Some(2), file_index: FileIndex(0x270), @@ -1001,7 +1039,7 @@ mod tests { LineInfo { offset: PdbInternalSectionOffset { section: 0x1, - offset: 0x00000168, + offset: 0x0168, }, length: Some(3), file_index: FileIndex(0x270), diff --git a/src/modi/mod.rs b/src/modi/mod.rs index a220a44..e73a7c0 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -1,5 +1,3 @@ -use scroll::{ctx::TryFromCtx, Endian}; - use crate::common::*; use crate::dbi::Module; use crate::msf::Stream; @@ -111,21 +109,6 @@ pub struct FileInfo<'a> { pub checksum: FileChecksum<'a>, } -/// Index of a file entry in the module. -/// -/// Use the [`LineProgram`] to resolve information on the file from this offset. -#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct FileIndex(pub u32); - -impl<'a> TryFromCtx<'a, Endian> for FileIndex { - type Error = scroll::Error; - type Size = usize; - - fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { - u32::try_from_ctx(this, le).map(|(num, s)| (Self(num), s)) - } -} - /// The kind of source construct a line info is referring to. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum LineInfoKind { diff --git a/src/symbol/annotations.rs b/src/symbol/annotations.rs index c0681d4..81bd0c5 100644 --- a/src/symbol/annotations.rs +++ b/src/symbol/annotations.rs @@ -75,13 +75,13 @@ pub enum BinaryAnnotation { ChangeCodeOffsetBase(u32), ChangeCodeOffset(u32), ChangeCodeLength(u32), - ChangeFile(u32), + ChangeFile(FileIndex), ChangeLineOffset(i32), ChangeLineEndDelta(u32), ChangeRangeKind(u32), ChangeColumnStart(u32), ChangeColumnEndDelta(i32), - ChangeCodeOffsetAndLineOffset(i32, i32), + ChangeCodeOffsetAndLineOffset(u32, i32), ChangeCodeLengthAndCodeOffset(u32, u32), ChangeColumnEnd(u32), } @@ -167,7 +167,7 @@ impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { BinaryAnnotation::ChangeCodeLength(self.uncompress_next()?) } BinaryAnnotationOpcode::ChangeFile => { - BinaryAnnotation::ChangeFile(self.uncompress_next()?) + BinaryAnnotation::ChangeFile(FileIndex(self.uncompress_next()?)) } BinaryAnnotationOpcode::ChangeLineOffset => { BinaryAnnotation::ChangeLineOffset(decode_signed_operand(self.uncompress_next()?)) @@ -187,8 +187,8 @@ impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset => { let operand = self.uncompress_next()?; BinaryAnnotation::ChangeCodeOffsetAndLineOffset( + operand & 0xf, decode_signed_operand(operand >> 4), - decode_signed_operand(operand & 0xf), ) } BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset => { @@ -245,14 +245,14 @@ fn test_binary_annotation_iter() { assert_eq!( annotations, vec![ - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(0, -1), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(3, 0), BinaryAnnotation::ChangeLineOffset(5), BinaryAnnotation::ChangeCodeOffset(8), BinaryAnnotation::ChangeLineOffset(3), BinaryAnnotation::ChangeCodeOffset(45), BinaryAnnotation::ChangeLineOffset(4), BinaryAnnotation::ChangeCodeOffset(7), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(-3, 1), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(2, -3), BinaryAnnotation::ChangeLineOffset(3), BinaryAnnotation::ChangeCodeLengthAndCodeOffset(3, 7), BinaryAnnotation::ChangeLineOffset(-7), @@ -276,10 +276,10 @@ fn test_binary_annotation_iter() { BinaryAnnotation::ChangeCodeOffset(50), BinaryAnnotation::ChangeLineOffset(3), BinaryAnnotation::ChangeCodeOffset(84), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(1, -1), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(-2, 6), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(2, -1), - BinaryAnnotation::ChangeCodeOffsetAndLineOffset(1, -7), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(3, 1), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(12, -2), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(3, 2), + BinaryAnnotation::ChangeCodeOffsetAndLineOffset(15, 1), BinaryAnnotation::ChangeLineOffset(2), BinaryAnnotation::ChangeCodeLengthAndCodeOffset(45, 9), BinaryAnnotation::ChangeCodeOffset(59), From b896dd9b0c470040ba9f0a303b8fdcd0d32bdfdf Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 28 Aug 2019 22:33:55 +0200 Subject: [PATCH 20/31] Implement AssignAdd for section offset types --- src/common.rs | 23 ++++++++++++++++++++++- src/modi/c13.rs | 10 ++-------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/common.rs b/src/common.rs index f508109..41182ab 100644 --- a/src/common.rs +++ b/src/common.rs @@ -8,7 +8,7 @@ use std::borrow::Cow; use std::fmt; use std::io; -use std::ops::{Add, Sub}; +use std::ops::{Add, AddAssign, Sub}; use std::result; use scroll::ctx::TryFromCtx; @@ -332,12 +332,21 @@ macro_rules! impl_va { type Output = Self; /// Adds the given offset to this address. + #[inline] fn add(mut self, offset: u32) -> Self { self.0 += offset; self } } + impl AddAssign for $type { + /// Adds the given offset to this address. + #[inline] + fn add_assign(&mut self, offset: u32) { + self.0 += offset; + } + } + impl Sub for $type { type Output = u32; @@ -441,12 +450,24 @@ macro_rules! impl_section_offset { /// /// This does not check whether the offset is still valid within the given section. If /// the offset is out of bounds, the conversion to `Rva` will return `None`. + #[inline] fn add(mut self, offset: u32) -> Self { self.offset += offset; self } } + impl AddAssign for $type { + /// Adds the given offset to this section offset. + /// + /// This does not check whether the offset is still valid within the given section. If + /// the offset is out of bounds, the conversion to `Rva` will return `None`. + #[inline] + fn add_assign(&mut self, offset: u32) { + self.offset += offset; + } + } + impl PartialOrd for $type { /// Compares offsets if they reside in the same section. #[inline] diff --git a/src/modi/c13.rs b/src/modi/c13.rs index dc59a70..275b981 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -748,18 +748,12 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { .map(|col_end| (i64::from(col_end) + i64::from(delta)) as u32) } BinaryAnnotation::ChangeCodeOffsetAndLineOffset(code_delta, line_delta) => { - self.code_offset = PdbInternalSectionOffset { - section: self.code_offset.section, - offset: self.code_offset.offset + code_delta, - }; + self.code_offset += code_delta; self.line = (i64::from(self.line) + i64::from(line_delta)) as u32; } BinaryAnnotation::ChangeCodeLengthAndCodeOffset(code_length, code_delta) => { self.code_length = Some(code_length); - self.code_offset = PdbInternalSectionOffset { - section: self.code_offset.section, - offset: self.code_offset.offset + code_delta, - }; + self.code_offset += code_delta; } BinaryAnnotation::ChangeColumnEnd(col_end) => { self.col_end = Some(col_end); From d3ae0ddc8ec2c3461952d96b4daf0480cd0a77bb Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 30 Aug 2019 10:44:00 +0200 Subject: [PATCH 21/31] Streamline impls and formatting of index types --- src/common.rs | 304 +++++++++++++++++++++++++--------------------- src/modi/c13.rs | 2 +- src/modi/mod.rs | 2 +- src/omap.rs | 10 +- src/pe.rs | 24 +++- src/symbol/mod.rs | 78 +----------- 6 files changed, 199 insertions(+), 221 deletions(-) diff --git a/src/common.rs b/src/common.rs index 41182ab..6dc3d2b 100644 --- a/src/common.rs +++ b/src/common.rs @@ -233,51 +233,109 @@ impl From for Error { /// The result type returned by this crate. pub type Result = result::Result; -/// Helper to format hexadecimal numbers. -pub(crate) struct HexFmt(pub T); +/// Implements `Pread` using the inner type. +macro_rules! impl_pread { + ($type:ty) => { + impl<'a> TryFromCtx<'a, Endian> for $type { + type Error = scroll::Error; + type Size = usize; -impl fmt::Debug for HexFmt -where - T: fmt::LowerHex, -{ - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:#x}", self.0) - } + fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { + TryFromCtx::try_from_ctx(this, le).map(|(i, s)| (Self(i), s)) + } + } + }; } -impl fmt::Display for HexFmt -where - T: fmt::LowerHex, -{ - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(self, f) - } +/// Displays the type as hexadecimal number. Debug prints the type name around. +macro_rules! impl_hex_fmt { + ($type:ty) => { + impl fmt::Display for $type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:#x}", self.0) + } + } + + impl fmt::Debug for $type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, concat!(stringify!($type), "({})"), self) + } + } + }; } -/// Helper to format hexadecimal numbers with fixed width. -pub(crate) struct FixedHexFmt(pub T); +/// Same as `impl_hex_fmt`, but prints `None` for none values. +macro_rules! impl_hex_fmt_opt { + ($type:ty, $none:literal) => { + impl fmt::Display for $type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.0 { + $none => f.write_str("None"), + val => write!(f, "{:#x}", val), + } + } + } -impl fmt::Debug for FixedHexFmt -where - T: fmt::LowerHex, -{ - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let width = 2 * std::mem::size_of::(); - write!(f, "{:#01$x}", self.0, width + 2) - } + impl fmt::Debug for $type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, concat!(stringify!($type), "({})"), self) + } + } + }; } -impl fmt::Display for FixedHexFmt -where - T: fmt::LowerHex, -{ - #[inline] - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(self, f) - } +/// Implements bidirectional conversion traits for the newtype. +macro_rules! impl_convert { + ($type:ty, $inner:ty) => { + impl From<$inner> for $type { + fn from(offset: $inner) -> Self { + Self(offset) + } + } + + impl From<$type> for $inner { + fn from(string_ref: $type) -> Self { + string_ref.0 + } + } + }; +} + +/// Declares that the given value represents `None`. +/// +/// - `Type::none` and `Default::default` return the none value. +/// - `Type::is_some` and `Type::is_none` check for the none value. +macro_rules! impl_opt { + ($type:ty, $none:literal) => { + impl $type { + /// Returns an index that points to no value. + #[inline] + pub const fn none() -> Self { + Self($none) + } + + /// Returns `true` if the index points to a valid value. + #[inline] + #[must_use] + pub fn is_some(self) -> bool { + self.0 != $none + } + + /// Returns `true` if the index indicates the absence of a value. + #[inline] + #[must_use] + pub fn is_none(self) -> bool { + self.0 == $none + } + } + + impl Default for $type { + #[inline] + fn default() -> Self { + Self::none() + } + } + }; } /// Implements common functionality for virtual addresses. @@ -316,18 +374,6 @@ macro_rules! impl_va { } } - impl From for $type { - fn from(addr: u32) -> Self { - Self(addr) - } - } - - impl From<$type> for u32 { - fn from(addr: $type) -> Self { - addr.0 - } - } - impl Add for $type { type Output = Self; @@ -355,17 +401,8 @@ macro_rules! impl_va { } } - impl fmt::Display for $type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - HexFmt(self.0).fmt(f) - } - } - - impl fmt::Debug for $type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, concat!(stringify!($type, "({})")), self) - } - } + impl_convert!($type, u32); + impl_hex_fmt!($type); }; } @@ -391,15 +428,7 @@ impl_va!(Rva); pub struct PdbInternalRva(pub u32); impl_va!(PdbInternalRva); - -impl<'a> TryFromCtx<'a, Endian> for PdbInternalRva { - type Error = scroll::Error; - type Size = usize; - - fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { - u32::try_from_ctx(this, le).map(|(i, s)| (PdbInternalRva(i), s)) - } -} +impl_pread!(PdbInternalRva); /// Implements common functionality for section offsets. macro_rules! impl_section_offset { @@ -483,8 +512,8 @@ macro_rules! impl_section_offset { impl fmt::Debug for $type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct(stringify!($type)) - .field("section", &HexFmt(self.section)) - .field("offset", &FixedHexFmt(self.offset)) + .field("section", &format_args!("{:#x}", self.section)) + .field("offset", &format_args!("{:#x}", self.offset)) .finish() } } @@ -548,20 +577,6 @@ impl_section_offset!(PdbInternalSectionOffset); pub struct StreamIndex(pub u16); impl StreamIndex { - /// Creates a stream index that points to no stream. - pub fn none() -> Self { - StreamIndex(0xffff) - } - - /// Determines whether this index indicates the absence of a stream. - /// - /// Loading a missing stream from the PDB will result in `None`. Otherwise, the stream is - /// expected to be present in the MSF and will result in an error if loading. - #[inline] - pub fn is_none(self) -> bool { - self.msf_number().is_none() - } - /// Returns the MSF stream number, if this stream is not a NULL stream. #[inline] pub(crate) fn msf_number(self) -> Option { @@ -572,12 +587,6 @@ impl StreamIndex { } } -impl Default for StreamIndex { - fn default() -> Self { - Self::none() - } -} - impl fmt::Display for StreamIndex { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.msf_number() { @@ -593,14 +602,8 @@ impl fmt::Debug for StreamIndex { } } -impl<'a> TryFromCtx<'a, Endian> for StreamIndex { - type Error = scroll::Error; - type Size = usize; - - fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { - u16::try_from_ctx(this, le).map(|(i, s)| (StreamIndex(i), s)) - } -} +impl_opt!(StreamIndex, 0xffff); +impl_pread!(StreamIndex); /// A reference to a string in the string table. /// @@ -613,55 +616,40 @@ impl<'a> TryFromCtx<'a, Endian> for StreamIndex { #[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct StringRef(pub u32); -impl From for StringRef { - fn from(offset: u32) -> Self { - StringRef(offset) - } -} - -impl From for u32 { - fn from(string_ref: StringRef) -> Self { - string_ref.0 - } -} - -impl fmt::Display for StringRef { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:#010x}", self.0) - } -} - -impl fmt::Debug for StringRef { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "StringRef({})", self) - } -} - -impl<'a> TryFromCtx<'a, Endian> for StringRef { - type Error = scroll::Error; - type Size = usize; - - fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { - u32::try_from_ctx(this, le).map(|(i, s)| (StringRef(i), s)) - } -} +impl_convert!(StringRef, u32); +impl_hex_fmt!(StringRef); +impl_pread!(StringRef); /// Index of a file entry in the module. /// /// Use the [`LineProgram`] to resolve information on the file from this offset. /// /// [`LineProgram`]: struct.LineProgram.html -#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct FileIndex(pub u32); -impl<'a> TryFromCtx<'a, Endian> for FileIndex { - type Error = scroll::Error; - type Size = usize; +impl_convert!(FileIndex, u32); +impl_hex_fmt!(FileIndex); +impl_pread!(FileIndex); - fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { - u32::try_from_ctx(this, le).map(|(num, s)| (Self(num), s)) - } -} +/// A reference into the symbol table of a module. +/// +/// To retrieve the symbol referenced by this index, use [`ModuleInfo::symbols_at`]. When iterating, +/// use [`SymbolIter::seek`] to jump between symbols. +/// +/// The numeric value of this index corresponds to the binary offset of the symbol in its symbol +/// stream. The index might also indicate the absence of a symbol (numeric value `0`). This is +/// indicated by `is_none` returning `false`. Seeking to this symbol will return an empty iterator. +/// +/// [`ModuleInfo::symbols_at`]: struct.ModuleInfo.html#method.symbols_at +/// [`SymbolIter::seek`]: struct.SymbolIter.html#method.seek +#[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SymbolIndex(pub u32); + +impl_opt!(SymbolIndex, 0); +impl_convert!(SymbolIndex, u32); +impl_hex_fmt_opt!(SymbolIndex, 0); +impl_pread!(SymbolIndex); /// Provides little-endian access to a &[u8]. #[derive(Debug, Clone)] @@ -1148,4 +1136,42 @@ mod tests { } } } + + mod newtypes { + use crate::common::*; + + // These tests use SymbolIndex as a proxy for all other types. + + #[test] + fn test_format_newtype() { + let val = SymbolIndex(0x42); + assert_eq!(format!("{}", val), "0x42"); + } + + #[test] + fn test_format_newtype_none() { + let val = SymbolIndex::none(); + assert_eq!(format!("{}", val), "None"); + } + + #[test] + fn test_debug_newtype() { + let val = SymbolIndex(0x42); + assert_eq!(format!("{:?}", val), "SymbolIndex(0x42)"); + } + + #[test] + fn test_debug_newtype_none() { + let val = SymbolIndex::none(); + assert_eq!(format!("{:?}", val), "SymbolIndex(None)"); + } + + #[test] + fn test_pread() { + let mut buf = ParseBuffer::from(&[0x42, 0, 0, 0][..]); + let val = buf.parse::().expect("parse"); + assert_eq!(val, SymbolIndex(0x42)); + assert!(buf.is_empty()); + } + } } diff --git a/src/modi/c13.rs b/src/modi/c13.rs index 275b981..bbbda1b 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -914,7 +914,7 @@ impl<'a> C13LineProgram<'a> { mod tests { use super::*; - use crate::symbol::{BinaryAnnotations, SymbolIndex}; + use crate::symbol::BinaryAnnotations; #[test] fn test_parse_inlinee_lines() { diff --git a/src/modi/mod.rs b/src/modi/mod.rs index e73a7c0..c9d414e 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -1,7 +1,7 @@ use crate::common::*; use crate::dbi::Module; use crate::msf::Stream; -use crate::symbol::{InlineSiteSymbol, SymbolIndex, SymbolIter}; +use crate::symbol::{InlineSiteSymbol, SymbolIter}; use crate::FallibleIterator; mod c13; diff --git a/src/omap.rs b/src/omap.rs index f0f5339..9a17c28 100644 --- a/src/omap.rs +++ b/src/omap.rs @@ -63,8 +63,14 @@ impl OMAPRecord { impl fmt::Debug for OMAPRecord { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("OMAPRecord") - .field("source_address", &FixedHexFmt(self.source_address())) - .field("target_address", &FixedHexFmt(self.target_address())) + .field( + "source_address", + &format_args!("{:#010x}", self.source_address()), + ) + .field( + "target_address", + &format_args!("{:#010x}", self.target_address()), + ) .finish() } } diff --git a/src/pe.rs b/src/pe.rs index 6c8eab0..d2af8da 100644 --- a/src/pe.rs +++ b/src/pe.rs @@ -104,21 +104,33 @@ impl fmt::Debug for ImageSectionHeader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ImageSectionHeader") .field("name()", &self.name().to_string()) - .field("physical_address", &FixedHexFmt(self.physical_address)) - .field("virtual_address", &FixedHexFmt(self.virtual_address)) + .field( + "physical_address", + &format_args!("{:#x}", self.physical_address), + ) + .field( + "virtual_address", + &format_args!("{:#x}", self.virtual_address), + ) .field("size_of_raw_data", &self.size_of_raw_data) - .field("pointer_to_raw_data", &HexFmt(self.pointer_to_raw_data)) + .field( + "pointer_to_raw_data", + &format_args!("{:#x}", self.pointer_to_raw_data), + ) .field( "pointer_to_relocations", - &HexFmt(self.pointer_to_relocations), + &format_args!("{:#x}", self.pointer_to_relocations), ) .field( "pointer_to_line_numbers", - &HexFmt(self.pointer_to_line_numbers), + &format_args!("{:#x}", self.pointer_to_line_numbers), ) .field("number_of_relocations", &self.number_of_relocations) .field("number_of_line_numbers", &self.number_of_line_numbers) - .field("characteristics", &FixedHexFmt(self.characteristics)) + .field( + "characteristics", + &format_args!("{:#x}", self.characteristics), + ) .finish() } } diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 213be66..c2a9609 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -26,71 +26,6 @@ pub type SymbolKind = u16; /// A register referred to by its number. pub type Register = u16; -/// A reference into the symbol table of a module. -/// -/// To retrieve the symbol referenced by this index, use [`ModuleInfo::symbols_at`]. When iterating, -/// use [`SymbolIter::seek`] to jump between symbols. -/// -/// The numeric value of this index corresponds to the binary offset of the symbol in its symbol -/// stream. The index might also indicate the absence of a symbol (numeric value `0`). This is -/// indicated by `is_none` returning `false`. Seeking to this symbol will return an empty iterator. -/// -/// [`ModuleInfo::symbols_at`]: struct.ModuleInfo.html#method.symbols_at -/// [`SymbolIter::seek`]: struct.SymbolIter.html#method.seek -#[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct SymbolIndex(pub u32); - -impl SymbolIndex { - /// Returns `true` if the symbol index points to a symbol. - #[inline] - #[must_use] - #[allow(clippy::trivially_copy_pass_by_ref)] - pub fn is_some(&self) -> bool { - self.0 != 0 - } - - /// Returns `true` if the symbol index indicates the absence of a symbol. - #[inline] - #[must_use] - #[allow(clippy::trivially_copy_pass_by_ref)] - pub fn is_none(&self) -> bool { - self.0 == 0 - } -} - -impl From for SymbolIndex { - fn from(offset: u32) -> Self { - Self(offset) - } -} - -impl From for u32 { - fn from(string_ref: SymbolIndex) -> Self { - string_ref.0 - } -} - -impl fmt::Display for SymbolIndex { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:#010x}", self.0) - } -} - -impl fmt::Debug for SymbolIndex { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "SymbolIndex({})", self) - } -} - -impl<'a> TryFromCtx<'a, Endian> for SymbolIndex { - type Error = scroll::Error; - type Size = usize; - - fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, Self::Size)> { - u32::try_from_ctx(this, le).map(|(i, s)| (Self(i), s)) - } -} - /// Represents a symbol from the symbol table. /// /// A `Symbol` is represented internally as a `&[u8]`, and in general the bytes inside are not @@ -1315,13 +1250,12 @@ impl<'t> SymbolIter<'t> { /// /// This can be used to jump to the sibiling or parent of a symbol record. pub fn seek(&mut self, index: SymbolIndex) { - // A symbol index of 0 referes to no symbol. Seek to the end of the iterator. - let pos = match index.0 { - 0 => self.buf.pos() + self.buf.len(), - pos => pos as usize, - }; - - self.buf.seek(pos); + if index.is_some() { + self.buf.seek(index.0 as usize); + } else { + // Seek to the end of the iterator. + self.buf.seek(self.buf.pos() + self.buf.len()); + } } /// Skip to the symbol referred to by `index`, returning the symbol. From 02d8c0dc1c730662d1d051ee26fd7cdc30c49dc5 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 30 Aug 2019 11:10:43 +0200 Subject: [PATCH 22/31] Add tests and fix a bug in seeking --- src/common.rs | 27 +++++++++++++++- src/symbol/mod.rs | 79 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/src/common.rs b/src/common.rs index 6dc3d2b..bed6114 100644 --- a/src/common.rs +++ b/src/common.rs @@ -698,7 +698,7 @@ impl<'b> ParseBuffer<'b> { /// Seek to the given absolute position. #[inline] pub fn seek(&mut self, pos: usize) { - self.1 = std::cmp::min(pos, self.len()); + self.1 = std::cmp::min(pos, self.0.len()); } /// Truncates the buffer at the given absolute position. @@ -1135,6 +1135,17 @@ mod tests { _ => panic!("expected EOF"), } } + + #[test] + fn test_seek() { + let mut buf = ParseBuffer::from("hello".as_bytes()); + buf.seek(5); + assert_eq!(buf.pos(), 5); + buf.seek(2); + assert_eq!(buf.pos(), 2); + buf.seek(10); + assert_eq!(buf.pos(), 5); + } } mod newtypes { @@ -1173,5 +1184,19 @@ mod tests { assert_eq!(val, SymbolIndex(0x42)); assert!(buf.is_empty()); } + + #[test] + fn test_is_some() { + let val = SymbolIndex(0x42); + assert!(val.is_some()); + assert!(!val.is_none()); + } + + #[test] + fn test_is_none() { + let val = SymbolIndex::none(); + assert!(val.is_none()); + assert!(!val.is_some()); + } } } diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index c2a9609..dbd5bcc 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -95,7 +95,7 @@ impl<'t> fmt::Debug for Symbol<'t> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "Symbol{{ kind: 0x{:4x} [{} bytes] }}", + "Symbol{{ kind: 0x{:x} [{} bytes] }}", self.raw_kind(), self.data.len() ) @@ -1799,4 +1799,81 @@ mod tests { assert_eq!(symbol.parse().expect("parse"), SymbolData::InlineSiteEnd); } } + + mod iterator { + use crate::symbol::*; + + fn create_iter() -> SymbolIter<'static> { + let data = &[ + 0x00, 0x00, 0x00, 0x00, // module signature (padding) + 0x02, 0x00, 0x4e, 0x11, // S_INLINESITE_END + 0x02, 0x00, 0x06, 0x00, // S_END + ]; + + let mut buf = ParseBuffer::from(&data[..]); + buf.seek(4); // skip the module signature + SymbolIter::new(buf) + } + + #[test] + fn test_iter() { + let symbols: Vec<_> = create_iter().collect().expect("collect"); + + let expected = [ + Symbol { + index: SymbolIndex(0x4), + data: &[0x4e, 0x11], // S_INLINESITE_END + }, + Symbol { + index: SymbolIndex(0x8), + data: &[0x06, 0x00], // S_END + }, + ]; + + assert_eq!(symbols, expected); + } + + #[test] + fn test_seek_some() { + let mut symbols = create_iter(); + symbols.seek(SymbolIndex(0x8)); + + let symbol = symbols.next().expect("get symbol"); + let expected = Symbol { + index: SymbolIndex(0x8), + data: &[0x06, 0x00], // S_END + }; + + assert_eq!(symbol, Some(expected)); + } + + #[test] + fn test_seek_none() { + let mut symbols = create_iter(); + symbols.seek(SymbolIndex::none()); + + let symbol = symbols.next().expect("get symbol"); + assert_eq!(symbol, None); + } + + #[test] + fn test_skip_to_some() { + let mut symbols = create_iter(); + let symbol = symbols.skip_to(SymbolIndex(0x8)).expect("get symbol"); + + let expected = Symbol { + index: SymbolIndex(0x8), + data: &[0x06, 0x00], // S_END + }; + + assert_eq!(symbol, Some(expected)); + } + + #[test] + fn test_skip_to_none() { + let mut symbols = create_iter(); + let symbol = symbols.skip_to(SymbolIndex::none()).expect("get symbol"); + assert_eq!(symbol, None); + } + } } From f87074684b5fc36ef755281ddf5ae5e2205a5f5a Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 30 Aug 2019 13:06:09 +0200 Subject: [PATCH 23/31] Make TypeIndex a newtype --- src/common.rs | 26 ++++++++++++++------ src/modi/c13.rs | 16 ++++++------ src/symbol/mod.rs | 28 +++++++++++---------- src/tpi/data.rs | 52 +++++++++++++++++++-------------------- src/tpi/header.rs | 32 ++++++++++++------------ src/tpi/mod.rs | 12 ++++----- src/tpi/primitive.rs | 6 ++--- tests/type_information.rs | 8 +++--- 8 files changed, 97 insertions(+), 83 deletions(-) diff --git a/src/common.rs b/src/common.rs index bed6114..0c18d9a 100644 --- a/src/common.rs +++ b/src/common.rs @@ -16,12 +16,6 @@ use scroll::{self, Endian, Pread, LE}; use crate::tpi::constants; -/// `TypeIndex` refers to a type somewhere in `PDB.type_information()`. -pub type TypeIndex = u32; - -/// `ItemId` refers to an item ID. -pub type ItemId = u32; - /// An error that occurred while reading or parsing the PDB. #[derive(Debug)] pub enum Error { @@ -605,6 +599,24 @@ impl fmt::Debug for StreamIndex { impl_opt!(StreamIndex, 0xffff); impl_pread!(StreamIndex); +/// Index of a [`Type`] in `PDB.type_information()`. +/// +/// [`Type`]: struct.Type.html +#[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TypeIndex(pub u32); + +impl_convert!(TypeIndex, u32); +impl_hex_fmt!(TypeIndex); +impl_pread!(TypeIndex); + +/// Index of an [`Id`] in `PDB. id_information()`. +#[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct IdIndex(pub u32); + +impl_convert!(IdIndex, u32); +impl_hex_fmt!(IdIndex); +impl_pread!(IdIndex); + /// A reference to a string in the string table. /// /// This type stores an offset into the global string table of the PDB. To retrieve the string @@ -1138,7 +1150,7 @@ mod tests { #[test] fn test_seek() { - let mut buf = ParseBuffer::from("hello".as_bytes()); + let mut buf = ParseBuffer::from(&b"hello"[..]); buf.seek(5); assert_eq!(buf.pos(), 5); buf.seek(2); diff --git a/src/modi/c13.rs b/src/modi/c13.rs index bbbda1b..f6fd453 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -111,7 +111,7 @@ impl DebugInlineeLinesHeader { #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct InlineeSourceLine<'a> { - pub inlinee: ItemId, + pub inlinee: IdIndex, pub file_id: FileIndex, pub line: u32, extra_files: &'a [u8], @@ -194,7 +194,7 @@ impl<'a> DebugInlineeLinesSubsection<'a> { } /// Retrieve the inlinee source line for the given inlinee. - fn find(&self, inlinee: ItemId) -> Result>> { + fn find(&self, inlinee: IdIndex) -> Result>> { self.lines().find(|line| line.inlinee == inlinee) } } @@ -933,13 +933,13 @@ mod tests { let expected = [ InlineeSourceLine { - inlinee: 0x12FE, + inlinee: IdIndex(0x12FE), file_id: FileIndex(0x168), line: 24, extra_files: &[], }, InlineeSourceLine { - inlinee: 0x12FD, + inlinee: IdIndex(0x12FD), file_id: FileIndex(0x168), line: 28, extra_files: &[], @@ -966,13 +966,13 @@ mod tests { let expected = [ InlineeSourceLine { - inlinee: 0x966EB, + inlinee: IdIndex(0x966EB), file_id: FileIndex(0x25e8), line: 19, extra_files: &[216, 26, 0, 0], }, InlineeSourceLine { - inlinee: 0x7A3F0, + inlinee: IdIndex(0x7A3F0), file_id: FileIndex(0x2cb0), line: 120, extra_files: &[120, 3, 0, 0], @@ -993,14 +993,14 @@ mod tests { let inline_site = InlineSiteSymbol { parent: SymbolIndex(0x190), end: SymbolIndex(0x1ec), - inlinee: 0x1180, + inlinee: IdIndex(0x1180), invocations: None, annotations: BinaryAnnotations::new(&[12, 2, 63, 12, 3, 9, 0, 0]), }; // Inline site from corresponding DEBUG_S_INLINEELINES subsection: let inlinee_line = InlineeSourceLine { - inlinee: 0x1180, + inlinee: IdIndex(0x1180), file_id: FileIndex(0x270), line: 341, extra_files: &[], diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index dbd5bcc..ddb0127 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -779,7 +779,7 @@ pub struct InlineSiteSymbol<'t> { /// The end symbol of this callsite. pub end: SymbolIndex, /// Identifier of the type describing the inline function. - pub inlinee: ItemId, + pub inlinee: IdIndex, /// The total number of invocations of the inline function. pub invocations: Option, /// Binary annotations containing the line program of this call site. @@ -813,8 +813,8 @@ impl<'t> TryFromCtx<'t, SymbolKind> for InlineSiteSymbol<'t> { /// Symbol kind `S_BUILDINFO`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct BuildInfoSymbol { - /// Identifier of the build information record. - pub id: ItemId, + /// Index of the build information record. + pub id: IdIndex, } impl<'t> TryFromCtx<'t, SymbolKind> for BuildInfoSymbol { @@ -1348,7 +1348,7 @@ mod tests { assert_eq!( symbol.parse().expect("parse"), SymbolData::RegisterVariable(RegisterVariableSymbol { - type_index: 8824, + type_index: TypeIndex(8824), register: 18, name: "this".into(), }) @@ -1433,7 +1433,7 @@ mod tests { assert_eq!( symbol.parse().expect("parse"), SymbolData::UserDefinedType(UserDefinedTypeSymbol { - type_index: 1648, + type_index: TypeIndex(1648), name: "va_list".into(), }) ); @@ -1454,7 +1454,7 @@ mod tests { symbol.parse().expect("parse"), SymbolData::Constant(ConstantSymbol { managed: false, - type_index: 4809, + type_index: TypeIndex(4809), value: Variant::U16(1), name: "__ISA_AVAILABLE_SSE2".into(), }) @@ -1477,7 +1477,7 @@ mod tests { SymbolData::Data(DataSymbol { global: true, managed: false, - type_index: 116, + type_index: TypeIndex(116), offset: PdbInternalSectionOffset { offset: 16, section: 3 @@ -1503,7 +1503,7 @@ mod tests { SymbolData::Data(DataSymbol { global: false, managed: false, - type_index: 32, + type_index: TypeIndex(32), offset: PdbInternalSectionOffset { offset: 74992, section: 2 @@ -1559,7 +1559,7 @@ mod tests { len: 6, dbg_start_offset: 5, dbg_end_offset: 5, - type_index: 4103, + type_index: TypeIndex(4103), offset: PdbInternalSectionOffset { offset: 21824, section: 1 @@ -1602,7 +1602,7 @@ mod tests { len: 18, dbg_start_offset: 4, dbg_end_offset: 9, - type_index: 4224, + type_index: TypeIndex(4224), offset: PdbInternalSectionOffset { offset: 22468, section: 1 @@ -1731,7 +1731,7 @@ mod tests { assert_eq!( symbol.parse().expect("parse"), SymbolData::Local(LocalSymbol { - type_index: 5057, + type_index: TypeIndex(5057), flags: LocalVariableFlags { isparam: true, addrtaken: false, @@ -1760,7 +1760,9 @@ mod tests { assert_eq!(symbol.raw_kind(), 0x114c); assert_eq!( symbol.parse().expect("parse"), - SymbolData::BuildInfo(BuildInfoSymbol { id: 0x115F }) + SymbolData::BuildInfo(BuildInfoSymbol { + id: IdIndex(0x115F) + }) ); } @@ -1780,7 +1782,7 @@ mod tests { SymbolData::InlineSite(InlineSiteSymbol { parent: SymbolIndex(0x0190), end: SymbolIndex(0x01d0), - inlinee: 4473, + inlinee: IdIndex(4473), invocations: None, annotations: BinaryAnnotations::new(&[12, 6, 3, 0]), }) diff --git a/src/tpi/data.rs b/src/tpi/data.rs index 67458ce..ae489c9 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -85,7 +85,7 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::Member(MemberType { attributes: FieldAttributes(buf.parse_u16()?), - field_type: buf.parse_u32()? as TypeIndex, + field_type: buf.parse()?, offset: parse_unsigned(&mut buf)? as u16, name: parse_string(leaf, &mut buf)?, })), @@ -105,26 +105,26 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::MemberFunction(MemberFunctionType { - return_type: buf.parse_u32()? as TypeIndex, - class_type: buf.parse_u32()? as TypeIndex, + return_type: buf.parse()?, + class_type: buf.parse()?, this_pointer_type: parse_optional_type_index(&mut buf)?, attributes: FunctionAttributes(buf.parse_u16()?), parameter_count: buf.parse_u16()?, - argument_list: buf.parse_u32()? as TypeIndex, + argument_list: buf.parse()?, this_adjustment: buf.parse_u32()?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2650-L2655 LF_METHOD | LF_METHOD_ST => Ok(TypeData::OverloadedMethod(OverloadedMethodType { count: buf.parse_u16()?, - method_list: buf.parse_u32()? as TypeIndex, + method_list: buf.parse()?, name: parse_string(leaf, &mut buf)?, })), @@ -133,7 +133,7 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result(mut buf: &mut ParseBuffer<'t>) -> Result unreachable!(), }, attributes: FieldAttributes(buf.parse_u16()?), - base_class: buf.parse_u32()? as TypeIndex, + base_class: buf.parse()?, offset: parse_unsigned(&mut buf)? as u32, })), @@ -162,7 +162,7 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result(mut buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::StaticMember(StaticMemberType { attributes: FieldAttributes(buf.parse_u16()?), - field_type: buf.parse_u32()? as TypeIndex, + field_type: buf.parse()?, name: parse_string(leaf, &mut buf)?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1469-L1506 LF_POINTER => Ok(TypeData::Pointer(PointerType { - underlying_type: buf.parse_u32()? as TypeIndex, + underlying_type: buf.parse()?, attributes: PointerAttributes(buf.parse_u32()?), })), @@ -185,12 +185,12 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result { - let type_index = buf.parse_u32()? as TypeIndex; + let type_index = buf.parse()?; // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1090-L1095 let flags = buf.parse_u16()?; @@ -207,8 +207,8 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::Enumeration(EnumerationType { count: buf.parse_u16()?, properties: TypeProperties(buf.parse_u16()?), - underlying_type: buf.parse_u32()? as TypeIndex, - fields: buf.parse_u32()? as TypeIndex, + underlying_type: buf.parse()?, + fields: buf.parse()?, name: parse_string(leaf, &mut buf)?, })), @@ -221,8 +221,8 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result { - let element_type = buf.parse_u32()? as TypeIndex; - let indexing_type = buf.parse_u32()? as TypeIndex; + let element_type = buf.parse()?; + let indexing_type = buf.parse()?; let stride: Option = if leaf == LF_STRIDED_ARRAY { Some(buf.parse_u32()?) } else { @@ -270,14 +270,14 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::Union(UnionType { count: buf.parse_u16()?, properties: TypeProperties(buf.parse_u16()?), - fields: buf.parse_u32()? as TypeIndex, + fields: buf.parse()?, size: parse_unsigned(&mut buf)? as u32, name: parse_string(leaf, &mut buf)?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2164-L2170 LF_BITFIELD => Ok(TypeData::Bitfield(BitfieldType { - underlying_type: buf.parse_u32()? as TypeIndex, + underlying_type: buf.parse()?, length: buf.parse_u8()?, position: buf.parse_u8()?, })), @@ -298,8 +298,8 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::VirtualBaseClass(VirtualBaseClassType { direct: leaf == LF_VBCLASS, attributes: FieldAttributes(buf.parse_u16()?), - base_class: buf.parse_u32()? as TypeIndex, - base_pointer: buf.parse_u32()? as TypeIndex, + base_class: buf.parse()?, + base_pointer: buf.parse()?, base_pointer_offset: parse_unsigned(&mut buf)? as u32, virtual_base_offset: parse_unsigned(&mut buf)? as u32, })), @@ -320,7 +320,7 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result { // other type @@ -343,7 +343,7 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result = Vec::with_capacity(count as usize); for _ in 0..count { - arglist.push(buf.parse_u32()? as TypeIndex); + arglist.push(buf.parse()?); } Ok(TypeData::ArgumentList(ArgumentList { arguments: arglist })) } @@ -358,7 +358,7 @@ pub(crate) fn parse_type_data<'t>(mut buf: &mut ParseBuffer<'t>) -> Result(mut buf: &mut ParseBuffer<'t>) -> Result(buf: &mut ParseBuffer<'t>) -> Result> { - let index = buf.parse_u32()? as TypeIndex; - if index == 0 || index == u32::from(u16::max_value()) { + let index = buf.parse()?; + if index == TypeIndex(0) || index == TypeIndex(0xffff) { Ok(None) } else { Ok(Some(index)) diff --git a/src/tpi/header.rs b/src/tpi/header.rs index 97c8d74..9d303ff 100644 --- a/src/tpi/header.rs +++ b/src/tpi/header.rs @@ -37,26 +37,26 @@ impl Header { assert!(buf.pos() == 0); let header = Header { - version: buf.parse_u32()?, - header_size: buf.parse_u32()?, - minimum_type_index: buf.parse_u32()?, - maximum_type_index: buf.parse_u32()?, - gprec_size: buf.parse_u32()?, - tpi_hash_stream: buf.parse_u16()?, - tpi_hash_pad_stream: buf.parse_u16()?, - hash_key_size: buf.parse_u32()?, - hash_bucket_size: buf.parse_u32()?, + version: buf.parse()?, + header_size: buf.parse()?, + minimum_type_index: buf.parse()?, + maximum_type_index: buf.parse()?, + gprec_size: buf.parse()?, + tpi_hash_stream: buf.parse()?, + tpi_hash_pad_stream: buf.parse()?, + hash_key_size: buf.parse()?, + hash_bucket_size: buf.parse()?, hash_values: Slice { - offset: buf.parse_i32()?, - size: buf.parse_u32()?, + offset: buf.parse()?, + size: buf.parse()?, }, ti_off: Slice { - offset: buf.parse_i32()?, - size: buf.parse_u32()?, + offset: buf.parse()?, + size: buf.parse()?, }, hash_adj: Slice { - offset: buf.parse_i32()?, - size: buf.parse_u32()?, + offset: buf.parse()?, + size: buf.parse()?, }, }; @@ -77,7 +77,7 @@ impl Header { buf.take((header.header_size - bytes_read) as usize)?; // do some final validations - if header.minimum_type_index < 4096 { + if header.minimum_type_index < TypeIndex(4096) { return Err(Error::InvalidTypeInformationHeader( "minimum type index is < 4096", )); diff --git a/src/tpi/mod.rs b/src/tpi/mod.rs index 8be270a..3ce2246 100644 --- a/src/tpi/mod.rs +++ b/src/tpi/mod.rs @@ -145,7 +145,7 @@ impl<'s> TypeInformation<'s> { /// Note that primitive types are not stored in the PDB file, so the number of distinct types /// reachable via this `TypeInformation` will be higher than `len()`. pub fn len(&self) -> usize { - (self.header.maximum_type_index - self.header.minimum_type_index) as usize + (self.header.maximum_type_index.0 - self.header.minimum_type_index.0) as usize } /// Returns whether this `TypeInformation` contains any types. @@ -220,7 +220,7 @@ impl<'t> Type<'t> { /// library /// * `Error::UnexpectedEof` if the type record is malformed pub fn parse(&self) -> Result> { - if self.0 < 0x1000 { + if self.0 < TypeIndex(0x1000) { // Primitive type type_data_for_primitive(self.0) } else { @@ -289,7 +289,7 @@ pub struct TypeFinder<'t> { impl<'t> TypeFinder<'t> { fn new(type_info: &'t TypeInformation<'_>, shift: u8) -> Self { - let count = type_info.header.maximum_type_index - type_info.header.minimum_type_index; + let count = type_info.header.maximum_type_index.0 - type_info.header.minimum_type_index.0; let shifted_count = (count >> shift) as usize; let mut positions = Vec::with_capacity(shifted_count); @@ -312,7 +312,7 @@ impl<'t> TypeFinder<'t> { /// `shift` refers to the size of these bit shifts. #[inline] fn resolve(&self, type_index: TypeIndex) -> (usize, usize) { - let raw = type_index - self.minimum_type_index; + let raw = type_index.0 - self.minimum_type_index.0; ( (raw >> self.shift) as usize, (raw & ((1 << self.shift) - 1)) as usize, @@ -331,7 +331,7 @@ impl<'t> TypeFinder<'t> { /// #[inline] pub fn max_indexed_type(&self) -> TypeIndex { - (self.positions.len() << self.shift) as TypeIndex + self.minimum_type_index - 1 + TypeIndex((self.positions.len() << self.shift) as u32 + self.minimum_type_index.0 - 1) } /// Update this `TypeFinder` based on the current position of a `TypeIter`. @@ -421,7 +421,7 @@ impl<'t> FallibleIterator for TypeIter<'t> { let type_buf = self.buf.take(length)?; let my_type_index = self.type_index; - self.type_index += 1; + self.type_index.0 += 1; // Done Ok(Some(Type(my_type_index, type_buf))) diff --git a/src/tpi/primitive.rs b/src/tpi/primitive.rs index 39e5a70..5d488f7 100644 --- a/src/tpi/primitive.rs +++ b/src/tpi/primitive.rs @@ -164,10 +164,10 @@ pub fn type_data_for_primitive(index: TypeIndex) -> Result> { // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L326-L750 // primitives live under 0x1000, and we should never reach here for non-primitive indexes - assert!(index < 0x1000); + assert!(index < TypeIndex(0x1000)); // indirection is stored in these bits - let indirection = match index & 0xf00 { + let indirection = match index.0 & 0xf00 { 0x000 => Indirection::None, 0x100 => Indirection::Pointer16, 0x200 => Indirection::FarPointer1616, @@ -182,7 +182,7 @@ pub fn type_data_for_primitive(index: TypeIndex) -> Result> { // primitive types are stored in the lowest octet // this groups "short" and "16-bit integer" together, but... right? *scratches head* - let kind = match index & 0xff { + let kind = match index.0 & 0xff { 0x03 => PrimitiveKind::Void, 0x08 => PrimitiveKind::HRESULT, diff --git a/tests/type_information.rs b/tests/type_information.rs index cfe2acd..5b527dd 100644 --- a/tests/type_information.rs +++ b/tests/type_information.rs @@ -25,10 +25,10 @@ fn iteration() { let len = type_information.len(); let mut count: usize = 0; - let mut last_index: pdb::TypeIndex = 4095; + let mut last_index = pdb::TypeIndex(4095); let mut iter = type_information.iter(); while let Some(typ) = iter.next().expect("next type") { - assert_eq!(typ.type_index(), last_index + 1); + assert_eq!(typ.type_index().0, last_index.0 + 1); last_index = typ.type_index(); count += 1; } @@ -43,12 +43,12 @@ fn type_finder() { let mut type_finder = type_information.type_finder(); let mut map: HashMap> = HashMap::new(); - assert_eq!(type_finder.max_indexed_type() >> 3, 4096 >> 3); + assert_eq!(type_finder.max_indexed_type().0 >> 3, 4096 >> 3); // iterate over all the types let mut iter = type_information.iter(); while let Some(typ) = iter.next().expect("next type") { - assert_eq!(type_finder.max_indexed_type() >> 3, typ.type_index() >> 3); + assert_eq!(type_finder.max_indexed_type().0 >> 3, typ.type_index().0 >> 3); // update the type finder type_finder.update(&iter); From 6d50bbafbdc6f5d582241331d669f7982ae83182 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 30 Aug 2019 13:18:19 +0200 Subject: [PATCH 24/31] Make SymbolIndex externally optional --- src/common.rs | 49 +---------------------------------------- src/modi/c13.rs | 2 +- src/symbol/mod.rs | 56 ++++++++++++++++++----------------------------- 3 files changed, 23 insertions(+), 84 deletions(-) diff --git a/src/common.rs b/src/common.rs index 0c18d9a..a2e55d6 100644 --- a/src/common.rs +++ b/src/common.rs @@ -258,26 +258,6 @@ macro_rules! impl_hex_fmt { }; } -/// Same as `impl_hex_fmt`, but prints `None` for none values. -macro_rules! impl_hex_fmt_opt { - ($type:ty, $none:literal) => { - impl fmt::Display for $type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { - $none => f.write_str("None"), - val => write!(f, "{:#x}", val), - } - } - } - - impl fmt::Debug for $type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, concat!(stringify!($type), "({})"), self) - } - } - }; -} - /// Implements bidirectional conversion traits for the newtype. macro_rules! impl_convert { ($type:ty, $inner:ty) => { @@ -658,9 +638,8 @@ impl_pread!(FileIndex); #[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct SymbolIndex(pub u32); -impl_opt!(SymbolIndex, 0); impl_convert!(SymbolIndex, u32); -impl_hex_fmt_opt!(SymbolIndex, 0); +impl_hex_fmt!(SymbolIndex); impl_pread!(SymbolIndex); /// Provides little-endian access to a &[u8]. @@ -1171,24 +1150,12 @@ mod tests { assert_eq!(format!("{}", val), "0x42"); } - #[test] - fn test_format_newtype_none() { - let val = SymbolIndex::none(); - assert_eq!(format!("{}", val), "None"); - } - #[test] fn test_debug_newtype() { let val = SymbolIndex(0x42); assert_eq!(format!("{:?}", val), "SymbolIndex(0x42)"); } - #[test] - fn test_debug_newtype_none() { - let val = SymbolIndex::none(); - assert_eq!(format!("{:?}", val), "SymbolIndex(None)"); - } - #[test] fn test_pread() { let mut buf = ParseBuffer::from(&[0x42, 0, 0, 0][..]); @@ -1196,19 +1163,5 @@ mod tests { assert_eq!(val, SymbolIndex(0x42)); assert!(buf.is_empty()); } - - #[test] - fn test_is_some() { - let val = SymbolIndex(0x42); - assert!(val.is_some()); - assert!(!val.is_none()); - } - - #[test] - fn test_is_none() { - let val = SymbolIndex::none(); - assert!(val.is_none()); - assert!(!val.is_some()); - } } } diff --git a/src/modi/c13.rs b/src/modi/c13.rs index f6fd453..2a5fb02 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -991,7 +991,7 @@ mod tests { // S_INLINESITE: Parent: 00000190, End: 000001EC, Inlinee: 0x1180 // BinaryAnnotations: CodeLengthAndCodeOffset 2 3f CodeLengthAndCodeOffset 3 9 let inline_site = InlineSiteSymbol { - parent: SymbolIndex(0x190), + parent: Some(SymbolIndex(0x190)), end: SymbolIndex(0x1ec), inlinee: IdIndex(0x1180), invocations: None, diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index ddb0127..1855b4d 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -125,6 +125,13 @@ fn parse_optional_name<'t>( } } +fn parse_optional_index(buf: &mut ParseBuffer<'_>) -> Result> { + Ok(match buf.parse()? { + SymbolIndex(0) => None, + index => Some(index), + }) +} + // data types are defined at: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L3038 // constants defined at: @@ -705,11 +712,11 @@ pub struct ProcedureSymbol<'t> { /// Indicates Deferred Procedure Calls (DPC). pub dpc: bool, /// The parent scope that this procedure is nested in. - pub parent: SymbolIndex, + pub parent: Option, /// The end symbol of this procedure. pub end: SymbolIndex, /// The next procedure symbol. - pub next: SymbolIndex, + pub next: Option, /// The length of the code block covered by this procedure. pub len: u32, /// Start offset of the procedure's body code, which marks the end of the prologue. @@ -749,9 +756,9 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { let symbol = ProcedureSymbol { global, dpc, - parent: buf.parse()?, + parent: parse_optional_index(&mut buf)?, end: buf.parse()?, - next: buf.parse()?, + next: parse_optional_index(&mut buf)?, len: buf.parse()?, dbg_start_offset: buf.parse()?, dbg_end_offset: buf.parse()?, @@ -775,7 +782,7 @@ pub struct InlineSiteSymbol<'t> { /// This might either be a [`ProcedureSymbol`] or another `InlineSiteSymbol`. /// /// [`ProcedureSymbol`]: struct.ProcedureSymbol.html - pub parent: SymbolIndex, + pub parent: Option, /// The end symbol of this callsite. pub end: SymbolIndex, /// Identifier of the type describing the inline function. @@ -794,7 +801,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for InlineSiteSymbol<'t> { let mut buf = ParseBuffer::from(this); let symbol = InlineSiteSymbol { - parent: buf.parse()?, + parent: parse_optional_index(&mut buf)?, end: buf.parse()?, inlinee: buf.parse()?, invocations: match kind { @@ -1250,12 +1257,7 @@ impl<'t> SymbolIter<'t> { /// /// This can be used to jump to the sibiling or parent of a symbol record. pub fn seek(&mut self, index: SymbolIndex) { - if index.is_some() { - self.buf.seek(index.0 as usize); - } else { - // Seek to the end of the iterator. - self.buf.seek(self.buf.pos() + self.buf.len()); - } + self.buf.seek(index.0 as usize); } /// Skip to the symbol referred to by `index`, returning the symbol. @@ -1553,9 +1555,9 @@ mod tests { SymbolData::Procedure(ProcedureSymbol { global: true, dpc: false, - parent: SymbolIndex(0), + parent: None, end: SymbolIndex(560), - next: SymbolIndex(0), + next: None, len: 6, dbg_start_offset: 5, dbg_end_offset: 5, @@ -1596,9 +1598,9 @@ mod tests { SymbolData::Procedure(ProcedureSymbol { global: false, dpc: false, - parent: SymbolIndex(0), + parent: None, end: SymbolIndex(412), - next: SymbolIndex(0), + next: None, len: 18, dbg_start_offset: 4, dbg_end_offset: 9, @@ -1780,7 +1782,7 @@ mod tests { assert_eq!( symbol.parse().expect("parse"), SymbolData::InlineSite(InlineSiteSymbol { - parent: SymbolIndex(0x0190), + parent: Some(SymbolIndex(0x0190)), end: SymbolIndex(0x01d0), inlinee: IdIndex(4473), invocations: None, @@ -1836,7 +1838,7 @@ mod tests { } #[test] - fn test_seek_some() { + fn test_seek() { let mut symbols = create_iter(); symbols.seek(SymbolIndex(0x8)); @@ -1850,16 +1852,7 @@ mod tests { } #[test] - fn test_seek_none() { - let mut symbols = create_iter(); - symbols.seek(SymbolIndex::none()); - - let symbol = symbols.next().expect("get symbol"); - assert_eq!(symbol, None); - } - - #[test] - fn test_skip_to_some() { + fn test_skip_to() { let mut symbols = create_iter(); let symbol = symbols.skip_to(SymbolIndex(0x8)).expect("get symbol"); @@ -1870,12 +1863,5 @@ mod tests { assert_eq!(symbol, Some(expected)); } - - #[test] - fn test_skip_to_none() { - let mut symbols = create_iter(); - let symbol = symbols.skip_to(SymbolIndex::none()).expect("get symbol"); - assert_eq!(symbol, None); - } } } From 2d8448af7e1cdc5816b71ab1602f2ddbefc0ff78 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 30 Aug 2019 13:38:03 +0200 Subject: [PATCH 25/31] Remove an obsolete comment on SymbolIndex --- src/common.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/common.rs b/src/common.rs index a2e55d6..211a393 100644 --- a/src/common.rs +++ b/src/common.rs @@ -629,10 +629,6 @@ impl_pread!(FileIndex); /// To retrieve the symbol referenced by this index, use [`ModuleInfo::symbols_at`]. When iterating, /// use [`SymbolIter::seek`] to jump between symbols. /// -/// The numeric value of this index corresponds to the binary offset of the symbol in its symbol -/// stream. The index might also indicate the absence of a symbol (numeric value `0`). This is -/// indicated by `is_none` returning `false`. Seeking to this symbol will return an empty iterator. -/// /// [`ModuleInfo::symbols_at`]: struct.ModuleInfo.html#method.symbols_at /// [`SymbolIter::seek`]: struct.SymbolIter.html#method.seek #[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] From d43490b89e2d1d4f8d2e0742f5044b1f1da6c14b Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Sat, 31 Aug 2019 00:40:55 +0200 Subject: [PATCH 26/31] Fix formatting --- tests/type_information.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/type_information.rs b/tests/type_information.rs index 5b527dd..55442ca 100644 --- a/tests/type_information.rs +++ b/tests/type_information.rs @@ -48,7 +48,10 @@ fn type_finder() { // iterate over all the types let mut iter = type_information.iter(); while let Some(typ) = iter.next().expect("next type") { - assert_eq!(type_finder.max_indexed_type().0 >> 3, typ.type_index().0 >> 3); + assert_eq!( + type_finder.max_indexed_type().0 >> 3, + typ.type_index().0 >> 3 + ); // update the type finder type_finder.update(&iter); From ca70b59439052b55ffb73ea76b36eb047ae8b9d1 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 3 Sep 2019 01:37:14 +0200 Subject: [PATCH 27/31] Provide more direct access to inlinees --- src/modi/c13.rs | 53 +++++++++++++++++++++++++++----------- src/modi/mod.rs | 67 +++++++++++++++++++++++++++---------------------- 2 files changed, 75 insertions(+), 45 deletions(-) diff --git a/src/modi/c13.rs b/src/modi/c13.rs index 2a5fb02..d71b8e6 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -192,11 +192,6 @@ impl<'a> DebugInlineeLinesSubsection<'a> { buf: ParseBuffer::from(self.data), } } - - /// Retrieve the inlinee source line for the given inlinee. - fn find(&self, inlinee: IdIndex) -> Result>> { - self.lines().find(|line| line.inlinee == inlinee) - } } #[derive(Clone, Copy, Debug, Default, Pread)] @@ -795,6 +790,41 @@ impl<'a> FallibleIterator for C13InlineeLineIterator<'a> { } } +#[derive(Clone, Debug, Default)] +pub struct C13Inlinee<'a>(InlineeSourceLine<'a>); + +impl<'a> C13Inlinee<'a> { + pub(crate) fn index(&self) -> IdIndex { + self.0.inlinee + } + + pub(crate) fn lines( + &self, + parent_offset: PdbInternalSectionOffset, + inline_site: &InlineSiteSymbol<'a>, + ) -> C13InlineeLineIterator<'a> { + C13InlineeLineIterator::new(parent_offset, inline_site, self.0) + } +} + +#[derive(Clone, Debug, Default)] +pub struct C13InlineeIterator<'a> { + inlinee_lines: DebugInlineeLinesIterator<'a>, +} + +impl<'a> FallibleIterator for C13InlineeIterator<'a> { + type Item = C13Inlinee<'a>; + type Error = Error; + + fn next(&mut self) -> Result> { + match self.inlinee_lines.next() { + Ok(Some(inlinee_line)) => Ok(Some(C13Inlinee(inlinee_line))), + Ok(None) => Ok(None), + Err(error) => Err(error), + } + } +} + #[derive(Clone, Debug, Default)] pub struct C13FileIterator<'a> { checksums: DebugFileChecksumsIterator<'a>, @@ -876,16 +906,9 @@ impl<'a> C13LineProgram<'a> { } } - pub(crate) fn inlinee_lines( - &self, - parent_offset: PdbInternalSectionOffset, - inline_site: &InlineSiteSymbol<'a>, - ) -> C13InlineeLineIterator<'a> { - match self.inlinee_lines.find(inline_site.inlinee) { - Ok(Some(inlinee_line)) => { - C13InlineeLineIterator::new(parent_offset, inline_site, inlinee_line) - } - _ => C13InlineeLineIterator::default(), + pub(crate) fn inlinees(&self) -> C13InlineeIterator<'a> { + C13InlineeIterator { + inlinee_lines: self.inlinee_lines.lines(), } } diff --git a/src/modi/mod.rs b/src/modi/mod.rs index c9d414e..de27478 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -202,22 +202,10 @@ impl<'a> LineProgram<'a> { } } - /// Returns an iterator over line records for an inline site. - /// - /// Note that line records are not guaranteed to be ordered by source code offset. If a - /// monotonic order by `PdbInternalSectionOffset` or `Rva` is required, the lines have to be - /// sorted manually. - pub fn inlinee_lines( - &self, - parent_offset: PdbInternalSectionOffset, - inline_site: &InlineSiteSymbol<'a>, - ) -> InlineeLineIterator<'a> { + /// Returns an iterator over all inlinees in this module. + pub fn inlinees(&self) -> InlineeIterator<'a> { match self.inner { - LineProgramInner::C13(ref inner) => InlineeLineIterator { - inner: InlineeLineIteratorInner::C13( - inner.inlinee_lines(parent_offset, inline_site), - ), - }, + LineProgramInner::C13(ref inner) => InlineeIterator(inner.inlinees()), } } @@ -259,33 +247,52 @@ impl<'a> FallibleIterator for LineIterator<'a> { } } -#[derive(Clone, Debug)] -enum InlineeLineIteratorInner<'a> { - C13(c13::C13InlineeLineIterator<'a>), +/// An inlined function that can evaluate to line information. +pub struct Inlinee<'a>(c13::C13Inlinee<'a>); + +impl<'a> Inlinee<'a> { + /// The index of this inlinee in the `IdInformation` stream (IPI). + pub fn index(&self) -> IdIndex { + self.0.index() + } + + /// Returns an iterator over line records for an inline site. + /// + /// Note that line records are not guaranteed to be ordered by source code offset. If a + /// monotonic order by `PdbInternalSectionOffset` or `Rva` is required, the lines have to be + /// sorted manually. + pub fn lines( + &self, + parent_offset: PdbInternalSectionOffset, + inline_site: &InlineSiteSymbol<'a>, + ) -> InlineeLineIterator<'a> { + InlineeLineIterator(self.0.lines(parent_offset, inline_site)) + } } /// An iterator over line information records in a module. -#[derive(Clone, Debug)] -pub struct InlineeLineIterator<'a> { - inner: InlineeLineIteratorInner<'a>, -} +#[derive(Clone, Debug, Default)] +pub struct InlineeIterator<'a>(c13::C13InlineeIterator<'a>); -impl Default for InlineeLineIterator<'_> { - fn default() -> Self { - InlineeLineIterator { - inner: InlineeLineIteratorInner::C13(Default::default()), - } +impl<'a> FallibleIterator for InlineeIterator<'a> { + type Item = Inlinee<'a>; + type Error = Error; + + fn next(&mut self) -> Result> { + self.0.next().map(|opt| opt.map(Inlinee)) } } +/// An iterator over line information records in a module. +#[derive(Clone, Debug, Default)] +pub struct InlineeLineIterator<'a>(c13::C13InlineeLineIterator<'a>); + impl<'a> FallibleIterator for InlineeLineIterator<'a> { type Item = LineInfo; type Error = Error; fn next(&mut self) -> Result> { - match self.inner { - InlineeLineIteratorInner::C13(ref mut inner) => inner.next(), - } + self.0.next() } } From aaddc454d206ddb9587f31b3950034c1c23692bc Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 3 Sep 2019 11:47:37 +0200 Subject: [PATCH 28/31] Add missing debug impl for Inlinees --- src/modi/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/modi/mod.rs b/src/modi/mod.rs index de27478..2042fd9 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -248,6 +248,7 @@ impl<'a> FallibleIterator for LineIterator<'a> { } /// An inlined function that can evaluate to line information. +#[derive(Clone, Debug)] pub struct Inlinee<'a>(c13::C13Inlinee<'a>); impl<'a> Inlinee<'a> { From 90499369f7aba325f276d94c47e853aa9bec9f6c Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Tue, 3 Sep 2019 17:59:56 +0200 Subject: [PATCH 29/31] Move inlinees to ModuleInfo --- src/modi/c13.rs | 48 +++++++++++++++++++++++++----------------------- src/modi/mod.rs | 28 ++++++++++++++++++---------- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/src/modi/c13.rs b/src/modi/c13.rs index d71b8e6..089eb22 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -812,6 +812,23 @@ pub struct C13InlineeIterator<'a> { inlinee_lines: DebugInlineeLinesIterator<'a>, } +impl<'a> C13InlineeIterator<'a> { + pub(crate) fn parse(data: &'a [u8]) -> Result { + let inlinee_data = DebugSubsectionIterator::new(data) + .find(|sec| sec.kind == DebugSubsectionKind::InlineeLines)? + .map(|sec| sec.data); + + let inlinee_lines = match inlinee_data { + Some(d) => DebugInlineeLinesSubsection::parse(d)?, + None => DebugInlineeLinesSubsection::default(), + }; + + Ok(Self { + inlinee_lines: inlinee_lines.lines(), + }) + } +} + impl<'a> FallibleIterator for C13InlineeIterator<'a> { type Item = C13Inlinee<'a>; type Error = Error; @@ -849,31 +866,22 @@ impl<'a> FallibleIterator for C13FileIterator<'a> { pub struct C13LineProgram<'a> { data: &'a [u8], file_checksums: DebugFileChecksumsSubsection<'a>, - inlinee_lines: DebugInlineeLinesSubsection<'a>, } impl<'a> C13LineProgram<'a> { pub(crate) fn parse(data: &'a [u8]) -> Result { - let mut file_checksums = DebugFileChecksumsSubsection::default(); - let mut inlinee_lines = DebugInlineeLinesSubsection::default(); - - let mut subsections = DebugSubsectionIterator::new(data); - while let Some(sec) = subsections.next()? { - match sec.kind { - DebugSubsectionKind::FileChecksums => { - file_checksums = DebugFileChecksumsSubsection::parse(sec.data)?; - } - DebugSubsectionKind::InlineeLines => { - inlinee_lines = DebugInlineeLinesSubsection::parse(sec.data)?; - } - _ => {} - } - } + let checksums_data = DebugSubsectionIterator::new(data) + .find(|sec| sec.kind == DebugSubsectionKind::FileChecksums)? + .map(|sec| sec.data); + + let file_checksums = match checksums_data { + Some(d) => DebugFileChecksumsSubsection::parse(d)?, + None => DebugFileChecksumsSubsection::default(), + }; Ok(C13LineProgram { data, file_checksums, - inlinee_lines, }) } @@ -906,12 +914,6 @@ impl<'a> C13LineProgram<'a> { } } - pub(crate) fn inlinees(&self) -> C13InlineeIterator<'a> { - C13InlineeIterator { - inlinee_lines: self.inlinee_lines.lines(), - } - } - pub(crate) fn files(&self) -> C13FileIterator<'a> { C13FileIterator { checksums: self.file_checksums.entries().unwrap_or_default(), diff --git a/src/modi/mod.rs b/src/modi/mod.rs index 2042fd9..df78082 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -48,6 +48,11 @@ impl<'s> ModuleInfo<'s> { }) } + fn lines_data(&self, size: usize) -> &[u8] { + let start = self.symbols_size as usize; + &self.stream[start..start + size] + } + /// Get an iterator over the all symbols in this module. pub fn symbols(&self) -> Result> { let mut buf = self.stream.parse_buffer(); @@ -65,17 +70,27 @@ impl<'s> ModuleInfo<'s> { /// Returns a line program that gives access to file and line information in this module. pub fn line_program(&self) -> Result> { - let start = self.symbols_size as usize; let inner = match self.lines_size { LinesSize::C11(_size) => return Err(Error::UnimplementedFeature("C11 line programs")), LinesSize::C13(size) => { - let data = &self.stream[start..start + size]; - LineProgramInner::C13(c13::C13LineProgram::parse(data)?) + LineProgramInner::C13(c13::C13LineProgram::parse(self.lines_data(size))?) } }; Ok(LineProgram { inner }) } + + /// Returns an iterator over all inlinees in this module. + /// + /// Inlinees are not guaranteed to be sorted. When requiring random access by `ItemId`, collect + /// them into a mapping structure rather than reiterating multiple times. + pub fn inlinees(&self) -> Result> { + Ok(InlineeIterator(match self.lines_size { + // C11 does not contain inlinee information. + LinesSize::C11(_size) => Default::default(), + LinesSize::C13(size) => c13::C13InlineeIterator::parse(self.lines_data(size))?, + })) + } } /// Checksum of a source file's contents. @@ -202,13 +217,6 @@ impl<'a> LineProgram<'a> { } } - /// Returns an iterator over all inlinees in this module. - pub fn inlinees(&self) -> InlineeIterator<'a> { - match self.inner { - LineProgramInner::C13(ref inner) => InlineeIterator(inner.inlinees()), - } - } - /// Looks up file information for the specified file. pub fn get_file_info(&self, offset: FileIndex) -> Result> { match self.inner { From 84c2d43a1ad9b462a0f02d8e4cf5d78f9270b5f5 Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 4 Sep 2019 19:15:59 +0200 Subject: [PATCH 30/31] Remove invalid annotation operator and add comments --- src/symbol/annotations.rs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/symbol/annotations.rs b/src/symbol/annotations.rs index 81bd0c5..6a28ad0 100644 --- a/src/symbol/annotations.rs +++ b/src/symbol/annotations.rs @@ -1,5 +1,3 @@ -use std::result; - use crate::common::*; use crate::FallibleIterator; @@ -39,13 +37,11 @@ enum BinaryAnnotationOpcode { ChangeCodeLengthAndCodeOffset = 12, /// param : end column number ChangeColumnEnd = 13, - /// A non valid value - Invalid, } -impl From for BinaryAnnotationOpcode { - fn from(value: u32) -> Self { - match value { +impl BinaryAnnotationOpcode { + fn parse(value: u32) -> Result { + Ok(match value { 0 => BinaryAnnotationOpcode::Eof, 1 => BinaryAnnotationOpcode::CodeOffset, 2 => BinaryAnnotationOpcode::ChangeCodeOffsetBase, @@ -60,8 +56,8 @@ impl From for BinaryAnnotationOpcode { 11 => BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset, 12 => BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset, 13 => BinaryAnnotationOpcode::ChangeColumnEnd, - _ => BinaryAnnotationOpcode::Invalid, - } + _ => return Err(Error::UnknownBinaryAnnotation(value)), + }) } } @@ -105,6 +101,10 @@ pub struct BinaryAnnotationsIter<'t> { } impl<'t> BinaryAnnotationsIter<'t> { + /// Parse a compact version of an unsigned integer. + /// + /// This implements `CVUncompressData`, which can decode numbers no larger than 0x1FFFFFFF. It + /// seems that values compressed this way are only used for binary annotations at this point. fn uncompress_next(&mut self) -> Result { let b1 = u32::from(self.buffer.parse::()?); if (b1 & 0x80) == 0x00 { @@ -142,13 +142,13 @@ impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { type Item = BinaryAnnotation; type Error = Error; - fn next(&mut self) -> result::Result, Self::Error> { + fn next(&mut self) -> Result> { if self.buffer.is_empty() { return Ok(None); } let op = self.uncompress_next()?; - let annotation = match BinaryAnnotationOpcode::from(op) { + let annotation = match BinaryAnnotationOpcode::parse(op)? { BinaryAnnotationOpcode::Eof => { // This makes the end of the stream self.buffer = ParseBuffer::default(); @@ -200,9 +200,6 @@ impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { BinaryAnnotationOpcode::ChangeColumnEnd => { BinaryAnnotation::ChangeColumnEnd(self.uncompress_next()?) } - BinaryAnnotationOpcode::Invalid => { - return Err(Error::UnknownBinaryAnnotation(op)); - } }; Ok(Some(annotation)) From 48cf06df5e2d671a2eab02e72cf79098f3f9129b Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Wed, 4 Sep 2019 19:16:25 +0200 Subject: [PATCH 31/31] Make Register a newtype --- src/common.rs | 7 +++++++ src/symbol/mod.rs | 5 +---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/common.rs b/src/common.rs index 3d13e2b..894a32d 100644 --- a/src/common.rs +++ b/src/common.rs @@ -638,6 +638,13 @@ impl_convert!(SymbolIndex, u32); impl_hex_fmt!(SymbolIndex); impl_pread!(SymbolIndex); +/// A register referred to by its number. +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Register(pub u16); + +impl_convert!(Register, u16); +impl_pread!(Register); + /// Provides little-endian access to a &[u8]. #[derive(Debug, Clone)] pub(crate) struct ParseBuffer<'b>(&'b [u8], usize); diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 1855b4d..ab7f65b 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -23,9 +23,6 @@ pub use self::annotations::*; /// The raw type discriminator for `Symbols`. pub type SymbolKind = u16; -/// A register referred to by its number. -pub type Register = u16; - /// Represents a symbol from the symbol table. /// /// A `Symbol` is represented internally as a `&[u8]`, and in general the bytes inside are not @@ -1351,7 +1348,7 @@ mod tests { symbol.parse().expect("parse"), SymbolData::RegisterVariable(RegisterVariableSymbol { type_index: TypeIndex(8824), - register: 18, + register: Register(18), name: "this".into(), }) );