Merge pull request #216 from philipc/symbol-table

read: start exposing lower level parsing API
gimli-rs · May 6, 2020 · 0028da9 · 0028da9
2 parents 59a2cb6 + 625e8e2
commit 0028da9
Show file tree

Hide file tree

Showing 18 changed files with 1,105 additions and 341 deletions.
diff --git a/src/lib.rs b/src/lib.rs
@@ -27,12 +27,11 @@ pub use common::*;
 
 #[macro_use]
 pub mod endian;
+pub use endian::*;
 
 #[macro_use]
-mod pod;
-// This isn't really intended for users yet, but other traits required it.
-#[doc(hidden)]
-pub use pod::Pod;
+pub mod pod;
+pub use pod::*;
 
 #[cfg(feature = "read_core")]
 pub mod read;

diff --git a/src/pod.rs b/src/pod.rs
@@ -74,16 +74,23 @@ impl<'data> fmt::Debug for Bytes<'data> {
 }
 
 impl<'data> Bytes<'data> {
+    /// Return the length of the byte slice.
     #[inline]
     pub fn len(&self) -> usize {
         self.0.len()
     }
 
+    /// Return true if the byte slice is empty.
     #[inline]
     pub fn is_empty(&self) -> bool {
         self.0.is_empty()
     }
 
+    /// Skip over the given number of bytes at the start of the byte slice.
+    ///
+    /// Modifies the byte slice to start after the bytes.
+    ///
+    /// Returns an error if there are too few bytes.
     #[inline]
     pub fn skip(&mut self, offset: usize) -> Result<()> {
         match self.0.get(offset..) {
@@ -98,6 +105,11 @@ impl<'data> Bytes<'data> {
         }
     }
 
+    /// Return a reference to the given number of bytes at the start of the byte slice.
+    ///
+    /// Modifies the byte slice to start after the bytes.
+    ///
+    /// Returns an error if there are too few bytes.
     #[inline]
     pub fn read_bytes(&mut self, count: usize) -> Result<Bytes<'data>> {
         match (self.0.get(..count), self.0.get(count..)) {
@@ -112,12 +124,20 @@ impl<'data> Bytes<'data> {
         }
     }
 
+    /// Return a reference to the given number of bytes at the given offset of the byte slice.
+    ///
+    /// Returns an error if the offset is invalid or there are too few bytes.
     #[inline]
     pub fn read_bytes_at(mut self, offset: usize, count: usize) -> Result<Bytes<'data>> {
         self.skip(offset)?;
         self.read_bytes(count)
     }
 
+    /// Return a reference to a `Pod` struct at the start of the byte slice.
+    ///
+    /// Modifies the byte slice to start after the bytes.
+    ///
+    /// Returns an error if there are too few bytes or the slice is incorrectly aligned.
     #[inline]
     pub fn read<T: Pod>(&mut self) -> Result<&'data T> {
         match from_bytes(self.0) {
@@ -132,12 +152,20 @@ impl<'data> Bytes<'data> {
         }
     }
 
+    /// Return a reference to a `Pod` struct at the given offset of the byte slice.
+    ///
+    /// Returns an error if there are too few bytes or the offset is incorrectly aligned.
     #[inline]
     pub fn read_at<T: Pod>(mut self, offset: usize) -> Result<&'data T> {
         self.skip(offset)?;
         self.read()
     }
 
+    /// Return a reference to a slice of `Pod` structs at the start of the byte slice.
+    ///
+    /// Modifies the byte slice to start after the bytes.
+    ///
+    /// Returns an error if there are too few bytes or the offset is incorrectly aligned.
     #[inline]
     pub fn read_slice<T: Pod>(&mut self, count: usize) -> Result<&'data [T]> {
         match slice_from_bytes(self.0, count) {
@@ -152,6 +180,9 @@ impl<'data> Bytes<'data> {
         }
     }
 
+    /// Return a reference to a slice of `Pod` structs at the given offset of the byte slice.
+    ///
+    /// Returns an error if there are too few bytes or the offset is incorrectly aligned.
     #[inline]
     pub fn read_slice_at<T: Pod>(mut self, offset: usize, count: usize) -> Result<&'data [T]> {
         self.skip(offset)?;

diff --git a/src/read/coff/file.rs b/src/read/coff/file.rs
@@ -12,14 +12,14 @@ use crate::read::{
 
 use super::{
     parse_symbol, CoffSection, CoffSectionIterator, CoffSegment, CoffSegmentIterator,
-    CoffSymbolIterator, SymbolTable,
+    CoffSymbolIterator, SectionTable, SymbolTable,
 };
 
 /// A COFF object file.
 #[derive(Debug)]
 pub struct CoffFile<'data> {
     pub(super) header: &'data pe::ImageFileHeader,
-    pub(super) sections: &'data [pe::ImageSectionHeader],
+    pub(super) sections: SectionTable<'data>,
     // TODO: ImageSymbolExBytes
     pub(super) symbols: SymbolTable<'data>,
     pub(super) data: Bytes<'data>,
@@ -29,21 +29,10 @@ impl<'data> CoffFile<'data> {
     /// Parse the raw COFF file data.
     pub fn parse(data: &'data [u8]) -> Result<Self> {
         let data = Bytes(data);
-        let mut tail = data;
-        let header = tail
-            .read::<pe::ImageFileHeader>()
-            .read_error("Invalid COFF file header size or alignment")?;
-
-        // Skip over the optional header and get the section headers.
-        tail.skip(header.size_of_optional_header.get(LE) as usize)
-            .read_error("Invalid COFF optional header size")?;
-        let sections = tail
-            .read_slice(header.number_of_sections.get(LE) as usize)
-            .read_error("Invalid COFF section headers")?;
-
-        let symbols = SymbolTable::parse(header, data)?;
+        let (header, tail) = pe::ImageFileHeader::parse(data)?;
+        let sections = header.sections(tail)?;
+        let symbols = header.symbols(data)?;
 
-        // TODO: maybe validate that the machine is known?
         Ok(CoffFile {
             header,
             sections,
@@ -96,10 +85,7 @@ where
     }
 
     fn section_by_index(&'file self, index: SectionIndex) -> Result<CoffSection<'data, 'file>> {
-        let section = self
-            .sections
-            .get(index.0)
-            .read_error("Invalid COFF section index")?;
+        let section = self.sections.section(index.0)?;
         Ok(CoffSection {
             file: self,
             index,
@@ -133,7 +119,7 @@ where
         CoffSymbolIterator {
             symbols: &self.symbols,
             // Hack: don't return any.
-            index: self.symbols.symbols.len(),
+            index: self.symbols.len(),
         }
     }
 
@@ -163,3 +149,38 @@ where
         }
     }
 }
+
+impl pe::ImageFileHeader {
+    /// Read the DOS header.
+    ///
+    /// The given data must be for the entire file.  Returns the data following the optional
+    /// header, which will contain the section headers.
+    pub fn parse<'data>(mut data: Bytes<'data>) -> read::Result<(&'data Self, Bytes<'data>)> {
+        let header = data
+            .read::<pe::ImageFileHeader>()
+            .read_error("Invalid COFF file header size or alignment")?;
+
+        // Skip over the optional header.
+        data.skip(header.size_of_optional_header.get(LE) as usize)
+            .read_error("Invalid COFF optional header size")?;
+
+        // TODO: maybe validate that the machine is known?
+        Ok((header, data))
+    }
+
+    /// Read the section table.
+    ///
+    /// `tail` must be the data following the optional header.
+    #[inline]
+    fn sections<'data>(&self, tail: Bytes<'data>) -> read::Result<SectionTable<'data>> {
+        SectionTable::parse(self, tail)
+    }
+
+    /// Read the symbol table and string table.
+    ///
+    /// `data` must be the entire file data.
+    #[inline]
+    fn symbols<'data>(&self, data: Bytes<'data>) -> read::Result<SymbolTable<'data>> {
+        SymbolTable::parse(self, data)
+    }
+}
diff --git a/src/read/coff/section.rs b/src/read/coff/section.rs
@@ -13,6 +13,70 @@ use crate::read::{
 
 use super::{CoffFile, CoffRelocationIterator};
 
+/// The table of section headers in a COFF or PE file.
+#[derive(Debug, Default, Clone, Copy)]
+pub struct SectionTable<'data> {
+    sections: &'data [pe::ImageSectionHeader],
+}
+
+impl<'data> SectionTable<'data> {
+    /// Parse the section table.
+    ///
+    /// `data` must be the data following the optional header.
+    pub fn parse(header: &pe::ImageFileHeader, mut data: Bytes<'data>) -> Result<Self> {
+        let sections = data
+            .read_slice(header.number_of_sections.get(LE) as usize)
+            .read_error("Invalid COFF/PE section headers")?;
+        Ok(SectionTable { sections })
+    }
+
+    /// Iterate over the section headers.
+    ///
+    /// Warning: sections indices start at 1.
+    #[inline]
+    pub fn iter(&self) -> slice::Iter<'data, pe::ImageSectionHeader> {
+        self.sections.iter()
+    }
+
+    /// Return true if the section table is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.sections.is_empty()
+    }
+
+    /// The number of section headers.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.sections.len()
+    }
+
+    /// Return the section header at the given index.
+    ///
+    /// The index is 1-based.
+    pub fn section(&self, index: usize) -> read::Result<&'data pe::ImageSectionHeader> {
+        self.sections
+            .get(index.wrapping_sub(1))
+            .read_error("Invalid COFF/PE section index")
+    }
+
+    /// Return the section header with the given name.
+    ///
+    /// The returned index is 1-based.
+    ///
+    /// Ignores sections with invalid names.
+    pub fn section_by_name(
+        &self,
+        strings: StringTable<'data>,
+        name: &[u8],
+    ) -> Option<(usize, &'data pe::ImageSectionHeader)> {
+        self.sections
+            .iter()
+            .enumerate()
+            .find(|(_, section)| section.name(strings) == Ok(name))
+            .map(|(index, section)| (index + 1, section))
+    }
+}
+
 /// An iterator over the loadable sections of a `CoffFile`.
 #[derive(Debug)]
 pub struct CoffSegmentIterator<'data, 'file>
@@ -91,7 +155,7 @@ impl<'data, 'file> ObjectSegment<'data> for CoffSegment<'data, 'file> {
 
     #[inline]
     fn name(&self) -> Result<Option<&str>> {
-        let name = self.section.name(self.file.symbols.strings)?;
+        let name = self.section.name(self.file.symbols.strings())?;
         Ok(Some(
             str::from_utf8(name)
                 .ok()
@@ -194,7 +258,7 @@ impl<'data, 'file> ObjectSection<'data> for CoffSection<'data, 'file> {
 
     #[inline]
     fn name(&self) -> Result<&str> {
-        let name = self.section.name(self.file.symbols.strings)?;
+        let name = self.section.name(self.file.symbols.strings())?;
         str::from_utf8(name)
             .ok()
             .read_error("Non UTF-8 COFF section name")

diff --git a/src/read/coff/symbol.rs b/src/read/coff/symbol.rs
@@ -11,13 +11,17 @@ use crate::read::{
     SymbolSection,
 };
 
+/// A table of symbol entries in a COFF or PE file.
+///
+/// Also includes the string table used for the symbol names.
 #[derive(Debug)]
-pub(crate) struct SymbolTable<'data> {
-    pub symbols: &'data [pe::ImageSymbolBytes],
-    pub strings: StringTable<'data>,
+pub struct SymbolTable<'data> {
+    symbols: &'data [pe::ImageSymbolBytes],
+    strings: StringTable<'data>,
 }
 
 impl<'data> SymbolTable<'data> {
+    /// Read the symbol table.
     pub fn parse(header: &pe::ImageFileHeader, mut data: Bytes<'data>) -> Result<Self> {
         // The symbol table may not be present.
         let symbol_offset = header.pointer_to_symbol_table.get(LE) as usize;
@@ -44,16 +48,62 @@ impl<'data> SymbolTable<'data> {
 
         Ok(SymbolTable {
             symbols,
-            strings: StringTable { data: strings },
+            strings: StringTable::new(strings),
         })
     }
 
+    /// Return the string table used for the symbol names.
+    #[inline]
+    pub fn strings(&self) -> StringTable<'data> {
+        self.strings
+    }
+
+    /// Return true if the symbol table is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.symbols.is_empty()
+    }
+
+    /// The number of symbols.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.symbols.len()
+    }
+
+    /// Return the symbol table entry at the given index.
+    #[inline]
+    pub fn symbol(&self, index: usize) -> Option<&'data pe::ImageSymbol> {
+        self.get::<pe::ImageSymbol>(index)
+    }
+
+    /// Return the symbol table entry or auxilliary record at the given index.
     pub fn get<T: Pod>(&self, index: usize) -> Option<&'data T> {
         let bytes = self.symbols.get(index)?;
         Bytes(&bytes.0[..]).read().ok()
     }
 }
 
+impl pe::ImageSymbol {
+    /// Parse a COFF symbol name.
+    ///
+    /// `strings` must be the string table used for symbols names.
+    pub fn name<'data>(&'data self, strings: StringTable<'data>) -> Result<&'data [u8]> {
+        if self.name[0] == 0 {
+            // If the name starts with 0 then the last 4 bytes are a string table offset.
+            let offset = u32::from_le_bytes(self.name[4..8].try_into().unwrap());
+            strings
+                .get(offset)
+                .read_error("Invalid COFF symbol name offset")
+        } else {
+            // The name is inline and padded with nulls.
+            Ok(match self.name.iter().position(|&x| x == 0) {
+                Some(end) => &self.name[..end],
+                None => &self.name[..],
+            })
+        }
+    }
+}
+
 /// An iterator over the symbols of a `CoffFile`.
 pub struct CoffSymbolIterator<'data, 'file>
 where
@@ -104,16 +154,8 @@ pub(crate) fn parse_symbol<'data>(
         } else {
             None
         }
-    } else if symbol.name[0] == 0 {
-        // If the name starts with 0 then the last 4 bytes are a string table offset.
-        let offset = u32::from_le_bytes(symbol.name[4..8].try_into().unwrap());
-        symbols.strings.get(offset).ok()
     } else {
-        // The name is inline and padded with nulls.
-        Some(match symbol.name.iter().position(|&x| x == 0) {
-            Some(end) => &symbol.name[..end],
-            None => &symbol.name[..],
-        })
+        symbol.name(symbols.strings()).ok()
     };
     let name = name.and_then(|s| str::from_utf8(s).ok());