Skip to content

Commit

Permalink
Merge pull request #216 from philipc/symbol-table
Browse files Browse the repository at this point in the history
read: start exposing lower level parsing API
  • Loading branch information
philipc committed May 6, 2020
2 parents 59a2cb6 + 625e8e2 commit 0028da9
Show file tree
Hide file tree
Showing 18 changed files with 1,105 additions and 341 deletions.
7 changes: 3 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@ pub use common::*;

#[macro_use]
pub mod endian;
pub use endian::*;

#[macro_use]
mod pod;
// This isn't really intended for users yet, but other traits required it.
#[doc(hidden)]
pub use pod::Pod;
pub mod pod;
pub use pod::*;

#[cfg(feature = "read_core")]
pub mod read;
Expand Down
31 changes: 31 additions & 0 deletions src/pod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,23 @@ impl<'data> fmt::Debug for Bytes<'data> {
}

impl<'data> Bytes<'data> {
/// Return the length of the byte slice.
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}

/// Return true if the byte slice is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

/// Skip over the given number of bytes at the start of the byte slice.
///
/// Modifies the byte slice to start after the bytes.
///
/// Returns an error if there are too few bytes.
#[inline]
pub fn skip(&mut self, offset: usize) -> Result<()> {
match self.0.get(offset..) {
Expand All @@ -98,6 +105,11 @@ impl<'data> Bytes<'data> {
}
}

/// Return a reference to the given number of bytes at the start of the byte slice.
///
/// Modifies the byte slice to start after the bytes.
///
/// Returns an error if there are too few bytes.
#[inline]
pub fn read_bytes(&mut self, count: usize) -> Result<Bytes<'data>> {
match (self.0.get(..count), self.0.get(count..)) {
Expand All @@ -112,12 +124,20 @@ impl<'data> Bytes<'data> {
}
}

/// Return a reference to the given number of bytes at the given offset of the byte slice.
///
/// Returns an error if the offset is invalid or there are too few bytes.
#[inline]
pub fn read_bytes_at(mut self, offset: usize, count: usize) -> Result<Bytes<'data>> {
self.skip(offset)?;
self.read_bytes(count)
}

/// Return a reference to a `Pod` struct at the start of the byte slice.
///
/// Modifies the byte slice to start after the bytes.
///
/// Returns an error if there are too few bytes or the slice is incorrectly aligned.
#[inline]
pub fn read<T: Pod>(&mut self) -> Result<&'data T> {
match from_bytes(self.0) {
Expand All @@ -132,12 +152,20 @@ impl<'data> Bytes<'data> {
}
}

/// Return a reference to a `Pod` struct at the given offset of the byte slice.
///
/// Returns an error if there are too few bytes or the offset is incorrectly aligned.
#[inline]
pub fn read_at<T: Pod>(mut self, offset: usize) -> Result<&'data T> {
self.skip(offset)?;
self.read()
}

/// Return a reference to a slice of `Pod` structs at the start of the byte slice.
///
/// Modifies the byte slice to start after the bytes.
///
/// Returns an error if there are too few bytes or the offset is incorrectly aligned.
#[inline]
pub fn read_slice<T: Pod>(&mut self, count: usize) -> Result<&'data [T]> {
match slice_from_bytes(self.0, count) {
Expand All @@ -152,6 +180,9 @@ impl<'data> Bytes<'data> {
}
}

/// Return a reference to a slice of `Pod` structs at the given offset of the byte slice.
///
/// Returns an error if there are too few bytes or the offset is incorrectly aligned.
#[inline]
pub fn read_slice_at<T: Pod>(mut self, offset: usize, count: usize) -> Result<&'data [T]> {
self.skip(offset)?;
Expand Down
63 changes: 42 additions & 21 deletions src/read/coff/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ use crate::read::{

use super::{
parse_symbol, CoffSection, CoffSectionIterator, CoffSegment, CoffSegmentIterator,
CoffSymbolIterator, SymbolTable,
CoffSymbolIterator, SectionTable, SymbolTable,
};

/// A COFF object file.
#[derive(Debug)]
pub struct CoffFile<'data> {
pub(super) header: &'data pe::ImageFileHeader,
pub(super) sections: &'data [pe::ImageSectionHeader],
pub(super) sections: SectionTable<'data>,
// TODO: ImageSymbolExBytes
pub(super) symbols: SymbolTable<'data>,
pub(super) data: Bytes<'data>,
Expand All @@ -29,21 +29,10 @@ impl<'data> CoffFile<'data> {
/// Parse the raw COFF file data.
pub fn parse(data: &'data [u8]) -> Result<Self> {
let data = Bytes(data);
let mut tail = data;
let header = tail
.read::<pe::ImageFileHeader>()
.read_error("Invalid COFF file header size or alignment")?;

// Skip over the optional header and get the section headers.
tail.skip(header.size_of_optional_header.get(LE) as usize)
.read_error("Invalid COFF optional header size")?;
let sections = tail
.read_slice(header.number_of_sections.get(LE) as usize)
.read_error("Invalid COFF section headers")?;

let symbols = SymbolTable::parse(header, data)?;
let (header, tail) = pe::ImageFileHeader::parse(data)?;
let sections = header.sections(tail)?;
let symbols = header.symbols(data)?;

// TODO: maybe validate that the machine is known?
Ok(CoffFile {
header,
sections,
Expand Down Expand Up @@ -96,10 +85,7 @@ where
}

fn section_by_index(&'file self, index: SectionIndex) -> Result<CoffSection<'data, 'file>> {
let section = self
.sections
.get(index.0)
.read_error("Invalid COFF section index")?;
let section = self.sections.section(index.0)?;
Ok(CoffSection {
file: self,
index,
Expand Down Expand Up @@ -133,7 +119,7 @@ where
CoffSymbolIterator {
symbols: &self.symbols,
// Hack: don't return any.
index: self.symbols.symbols.len(),
index: self.symbols.len(),
}
}

Expand Down Expand Up @@ -163,3 +149,38 @@ where
}
}
}

impl pe::ImageFileHeader {
/// Read the DOS header.
///
/// The given data must be for the entire file. Returns the data following the optional
/// header, which will contain the section headers.
pub fn parse<'data>(mut data: Bytes<'data>) -> read::Result<(&'data Self, Bytes<'data>)> {
let header = data
.read::<pe::ImageFileHeader>()
.read_error("Invalid COFF file header size or alignment")?;

// Skip over the optional header.
data.skip(header.size_of_optional_header.get(LE) as usize)
.read_error("Invalid COFF optional header size")?;

// TODO: maybe validate that the machine is known?
Ok((header, data))
}

/// Read the section table.
///
/// `tail` must be the data following the optional header.
#[inline]
fn sections<'data>(&self, tail: Bytes<'data>) -> read::Result<SectionTable<'data>> {
SectionTable::parse(self, tail)
}

/// Read the symbol table and string table.
///
/// `data` must be the entire file data.
#[inline]
fn symbols<'data>(&self, data: Bytes<'data>) -> read::Result<SymbolTable<'data>> {
SymbolTable::parse(self, data)
}
}
68 changes: 66 additions & 2 deletions src/read/coff/section.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,70 @@ use crate::read::{

use super::{CoffFile, CoffRelocationIterator};

/// The table of section headers in a COFF or PE file.
#[derive(Debug, Default, Clone, Copy)]
pub struct SectionTable<'data> {
sections: &'data [pe::ImageSectionHeader],
}

impl<'data> SectionTable<'data> {
/// Parse the section table.
///
/// `data` must be the data following the optional header.
pub fn parse(header: &pe::ImageFileHeader, mut data: Bytes<'data>) -> Result<Self> {
let sections = data
.read_slice(header.number_of_sections.get(LE) as usize)
.read_error("Invalid COFF/PE section headers")?;
Ok(SectionTable { sections })
}

/// Iterate over the section headers.
///
/// Warning: sections indices start at 1.
#[inline]
pub fn iter(&self) -> slice::Iter<'data, pe::ImageSectionHeader> {
self.sections.iter()
}

/// Return true if the section table is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.sections.is_empty()
}

/// The number of section headers.
#[inline]
pub fn len(&self) -> usize {
self.sections.len()
}

/// Return the section header at the given index.
///
/// The index is 1-based.
pub fn section(&self, index: usize) -> read::Result<&'data pe::ImageSectionHeader> {
self.sections
.get(index.wrapping_sub(1))
.read_error("Invalid COFF/PE section index")
}

/// Return the section header with the given name.
///
/// The returned index is 1-based.
///
/// Ignores sections with invalid names.
pub fn section_by_name(
&self,
strings: StringTable<'data>,
name: &[u8],
) -> Option<(usize, &'data pe::ImageSectionHeader)> {
self.sections
.iter()
.enumerate()
.find(|(_, section)| section.name(strings) == Ok(name))
.map(|(index, section)| (index + 1, section))
}
}

/// An iterator over the loadable sections of a `CoffFile`.
#[derive(Debug)]
pub struct CoffSegmentIterator<'data, 'file>
Expand Down Expand Up @@ -91,7 +155,7 @@ impl<'data, 'file> ObjectSegment<'data> for CoffSegment<'data, 'file> {

#[inline]
fn name(&self) -> Result<Option<&str>> {
let name = self.section.name(self.file.symbols.strings)?;
let name = self.section.name(self.file.symbols.strings())?;
Ok(Some(
str::from_utf8(name)
.ok()
Expand Down Expand Up @@ -194,7 +258,7 @@ impl<'data, 'file> ObjectSection<'data> for CoffSection<'data, 'file> {

#[inline]
fn name(&self) -> Result<&str> {
let name = self.section.name(self.file.symbols.strings)?;
let name = self.section.name(self.file.symbols.strings())?;
str::from_utf8(name)
.ok()
.read_error("Non UTF-8 COFF section name")
Expand Down
68 changes: 55 additions & 13 deletions src/read/coff/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ use crate::read::{
SymbolSection,
};

/// A table of symbol entries in a COFF or PE file.
///
/// Also includes the string table used for the symbol names.
#[derive(Debug)]
pub(crate) struct SymbolTable<'data> {
pub symbols: &'data [pe::ImageSymbolBytes],
pub strings: StringTable<'data>,
pub struct SymbolTable<'data> {
symbols: &'data [pe::ImageSymbolBytes],
strings: StringTable<'data>,
}

impl<'data> SymbolTable<'data> {
/// Read the symbol table.
pub fn parse(header: &pe::ImageFileHeader, mut data: Bytes<'data>) -> Result<Self> {
// The symbol table may not be present.
let symbol_offset = header.pointer_to_symbol_table.get(LE) as usize;
Expand All @@ -44,16 +48,62 @@ impl<'data> SymbolTable<'data> {

Ok(SymbolTable {
symbols,
strings: StringTable { data: strings },
strings: StringTable::new(strings),
})
}

/// Return the string table used for the symbol names.
#[inline]
pub fn strings(&self) -> StringTable<'data> {
self.strings
}

/// Return true if the symbol table is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.symbols.is_empty()
}

/// The number of symbols.
#[inline]
pub fn len(&self) -> usize {
self.symbols.len()
}

/// Return the symbol table entry at the given index.
#[inline]
pub fn symbol(&self, index: usize) -> Option<&'data pe::ImageSymbol> {
self.get::<pe::ImageSymbol>(index)
}

/// Return the symbol table entry or auxilliary record at the given index.
pub fn get<T: Pod>(&self, index: usize) -> Option<&'data T> {
let bytes = self.symbols.get(index)?;
Bytes(&bytes.0[..]).read().ok()
}
}

impl pe::ImageSymbol {
/// Parse a COFF symbol name.
///
/// `strings` must be the string table used for symbols names.
pub fn name<'data>(&'data self, strings: StringTable<'data>) -> Result<&'data [u8]> {
if self.name[0] == 0 {
// If the name starts with 0 then the last 4 bytes are a string table offset.
let offset = u32::from_le_bytes(self.name[4..8].try_into().unwrap());
strings
.get(offset)
.read_error("Invalid COFF symbol name offset")
} else {
// The name is inline and padded with nulls.
Ok(match self.name.iter().position(|&x| x == 0) {
Some(end) => &self.name[..end],
None => &self.name[..],
})
}
}
}

/// An iterator over the symbols of a `CoffFile`.
pub struct CoffSymbolIterator<'data, 'file>
where
Expand Down Expand Up @@ -104,16 +154,8 @@ pub(crate) fn parse_symbol<'data>(
} else {
None
}
} else if symbol.name[0] == 0 {
// If the name starts with 0 then the last 4 bytes are a string table offset.
let offset = u32::from_le_bytes(symbol.name[4..8].try_into().unwrap());
symbols.strings.get(offset).ok()
} else {
// The name is inline and padded with nulls.
Some(match symbol.name.iter().position(|&x| x == 0) {
Some(end) => &symbol.name[..end],
None => &symbol.name[..],
})
symbol.name(symbols.strings()).ok()
};
let name = name.and_then(|s| str::from_utf8(s).ok());

Expand Down

0 comments on commit 0028da9

Please sign in to comment.