Skip to content

Commit

Permalink
Change over to using RawSymbol to store lib info.
Browse files Browse the repository at this point in the history
  • Loading branch information
WINSDK committed Apr 14, 2024
1 parent 92a7167 commit a6a1068
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 62 deletions.
26 changes: 19 additions & 7 deletions debugvault/src/elf.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
use object::{Endian, Object, ObjectSection, ObjectSymbol, ObjectSymbolTable, RelocationKind, RelocationTarget};
use object::read::elf::{ElfFile, FileHeader};
use crate::{AddressMap, Addressed, RawSymbol};
use object::elf::{R_X86_64_COPY, R_X86_64_GLOB_DAT, R_X86_64_JUMP_SLOT};
use crate::{AddressMap, Addressed};
use object::read::elf::{ElfFile, FileHeader};
use object::{
Endian, Object, ObjectSection, ObjectSymbol, ObjectSymbolTable, RelocationKind,
RelocationTarget,
};

pub struct ElfDebugInfo<'data, Elf: FileHeader> {
/// Parsed ELF header.
obj: &'data ElfFile<'data, Elf>,
/// Any parsed but not yet relocated symbols.
pub syms: AddressMap<&'data str>,
pub syms: AddressMap<RawSymbol<'data>>,
}

impl<'data, Elf: FileHeader> ElfDebugInfo<'data, Elf> {
pub fn parse(obj: &'data ElfFile<'data, Elf>) -> Result<Self, object::Error> {
let mut this = Self { obj, syms: AddressMap::default() };
let mut this = Self {
obj,
syms: AddressMap::default(),
};
this.parse_symbols();
this.parse_imports();
Ok(this)
Expand Down Expand Up @@ -73,7 +79,10 @@ impl<'data, Elf: FileHeader> ElfDebugInfo<'data, Elf> {
};

// TODO: find modules
self.syms.push(Addressed { addr, item: name });
self.syms.push(Addressed {
addr,
item: RawSymbol { name, module: None },
});
}
}
}
Expand All @@ -83,7 +92,10 @@ impl<'data, Elf: FileHeader> ElfDebugInfo<'data, Elf> {
self.syms.extend(crate::parse_symbol_table(self.obj));
self.syms.push(Addressed {
addr: self.obj.entry() as usize,
item: "entry",
item: RawSymbol {
name: "entry",
module: None,
},
});
}
}
25 changes: 14 additions & 11 deletions debugvault/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ pub struct FileAttr {
pub column_end: usize,
}

struct RawSymbol<'data> {
name: &'data str,
module: Option<&'data str>,
}

pub struct Symbol {
name: TokenStream,
name_as_str: ArcStr,
Expand Down Expand Up @@ -66,15 +71,15 @@ fn is_name_an_intrinsic(name: &str) -> bool {
}

impl Symbol {
pub fn new(name: TokenStream) -> Self {
pub fn new(name: TokenStream, module: Option<&str>) -> Self {
let is_intrinsics = is_name_an_intrinsic(name.inner());
let name_as_str = String::from_iter(name.tokens().iter().map(|t| &t.text[..]));
let name_as_str = ArcStr::new(&name_as_str);

Self {
name_as_str,
name,
module: None,
module: module.map(|x| x.to_string()),
is_import: false,
is_intrinsics,
}
Expand All @@ -85,11 +90,6 @@ impl Symbol {
self
}

fn with_module(mut self, module: String) -> Self {
self.module = Some(module);
self
}

#[inline]
pub fn name(&self) -> &[Token] {
self.name.tokens()
Expand Down Expand Up @@ -146,13 +146,16 @@ pub struct Index {

fn parse_symbol_table<'data, O: Object<'data, 'data>>(
obj: &'data O,
) -> AddressMap<&'data str> {
) -> AddressMap<RawSymbol<'data>> {
let mut syms = AddressMap::default();
for sym in obj.symbols() {
match sym.name() {
Ok(name) => syms.push(Addressed {
addr: sym.address() as usize,
item: name,
item: RawSymbol {
name,
module: None
},
}),
Err(err) => {
log::complex!(
Expand Down Expand Up @@ -232,7 +235,7 @@ impl Index {

log::PROGRESS.set("Parsing symbols.", syms.len());
parallel_compute(syms.mapping, &mut this.symbols, |sym| {
let demangled = Symbol::new(demangler::parse(sym.item));
let demangled = Symbol::new(demangler::parse(sym.item.name), sym.item.module);
log::PROGRESS.step();
Addressed {
addr: sym.addr,
Expand Down Expand Up @@ -323,7 +326,7 @@ impl Index {
pub fn insert_func(&mut self, addr: usize, name: &str) {
self.symbols.push(Addressed {
addr,
item: Arc::new(Symbol::new(TokenStream::simple(name))),
item: Arc::new(Symbol::new(TokenStream::simple(name), None)),
})
}

Expand Down
48 changes: 26 additions & 22 deletions debugvault/src/macho.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::dwarf::{self, Dwarf};
use crate::{AddressMap, Addressed};
use crate::{AddressMap, Addressed, RawSymbol};
use object::macho::{self, DyldInfoCommand, DysymtabCommand, LinkeditDataCommand};
use object::read::macho::{MachHeader, MachOFile, SymbolTable};
use object::{Endianness, Object, ObjectSegment, ReadRef};
Expand Down Expand Up @@ -115,9 +115,9 @@ pub struct MachoDebugInfo<'data, Mach: MachHeader> {
/// Where the first segment starts.
base_addr: u64,
/// Dynamic libraries found when parsing load commands.
dylibs: Vec<&'data [u8]>,
dylibs: Vec<&'data str>,
/// Any parsed but not yet relocated symbols.
pub syms: AddressMap<&'data str>,
pub syms: AddressMap<RawSymbol<'data>>,
// ---- Required load commands ----
chained_fixups: Option<&'data LinkeditDataCommand<Mach::Endian>>,
symtab: Option<SymbolTable<'data, Mach>>,
Expand Down Expand Up @@ -161,7 +161,7 @@ impl<'data, Mach: MachHeader<Endian = Endianness>> MachoDebugInfo<'data, Mach> {

let twolevel = header.flags(endian) & macho::MH_TWOLEVEL != 0;
if twolevel {
self.dylibs.push(&[][..]);
self.dylibs.push("");
}

let mut load_cmds_iter = header.load_commands(endian, self.obj.data(), 0)?;
Expand All @@ -176,7 +176,9 @@ impl<'data, Mach: MachHeader<Endian = Endianness>> MachoDebugInfo<'data, Mach> {
self.dylid_info = Some(dylib_info);
}
if let Some(dylib) = lcmd.dylib()? {
self.dylibs.push(lcmd.string(endian, dylib.dylib.name)?);
let dylib = lcmd.string(endian, dylib.dylib.name)?;
let dylib = std::str::from_utf8(dylib).unwrap_or("");
self.dylibs.push(dylib);
}
if lcmd.cmd() == macho::LC_DYLD_CHAINED_FIXUPS {
self.chained_fixups = Some(lcmd.data()?);
Expand Down Expand Up @@ -249,15 +251,18 @@ impl<'data, Mach: MachHeader<Endian = Endianness>> MachoDebugInfo<'data, Mach> {
let entrypoint = self.obj.entry() + self.base_addr;
self.syms.push(Addressed {
addr: entrypoint as usize,
item: "entry",
item: RawSymbol {
name: "entry",
module: None,
},
});
}
}

#[allow(dead_code)]
fn parse_dynamic_table<'data>(
_bytes: &'data [u8],
_symbols: &mut AddressMap<&'data str>,
_symbols: &mut AddressMap<RawSymbol<'data>>,
) -> Result<(), object::Error> {
log::complex!(
w "[macho::parse_dynamic_table] ",
Expand Down Expand Up @@ -422,8 +427,8 @@ fn parse_page_starts_table_starts(page_starts: u64, page_count: u64, data: &[u8]

fn parse_chained_fixups<'data, Mach: MachHeader<Endian = Endianness>>(
base_addr: u64,
syms: &mut AddressMap<&'data str>,
dylibs: &[&'data [u8]],
syms: &mut AddressMap<RawSymbol<'data>>,
dylibs: &[&'data str],
chained_fixups: &LinkeditDataCommand<Mach::Endian>,
data: &'data [u8],
endian: Endianness,
Expand Down Expand Up @@ -616,22 +621,21 @@ fn parse_chained_fixups<'data, Mach: MachHeader<Endian = Endianness>>(
let target_addr = base_addr + chain_entry_addr;

if !entry.name.is_empty() {
// TODO: add module
//if let Some(lib) = dylibs.get(entry.lib_ordinal as usize) {
// // Strip path prefix.
// let module = String::from_utf8_lossy(lib);
// let module = module
// .rsplit_once('/')
// .map(|x| x.1)
// .unwrap_or(&module)
// .to_string();

// symbol = symbol.with_module(module);
//}
let module = dylibs.get(entry.lib_ordinal as usize).map(|lib| {
// Strip path prefix.
lib
.rsplit_once('/')
.map(|x| x.1)
.filter(|x| !x.is_empty())
.unwrap_or(lib)
});

syms.push(Addressed {
addr: target_addr as usize,
item: entry.name,
item: RawSymbol {
name: entry.name,
module,
}
});
} else {
log::complex!(
Expand Down
43 changes: 32 additions & 11 deletions debugvault/src/pdb.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::intern::InternMap;
use crate::{AddressMap, Addressed, FileAttr};
use crate::{AddressMap, Addressed, FileAttr, RawSymbol};
use crossbeam_queue::SegQueue;
use object::Object;
use pdb::{FallibleIterator, SymbolData};
use std::borrow::Cow;
use std::path::Path;
use std::pin::Pin;
use std::sync::Arc;
Expand All @@ -12,8 +13,10 @@ pub struct PDB<'data> {
pub file_attrs: AddressMap<FileAttr>,
/// Container for holding syms.
global_syms: pdb::SymbolTable<'data>,
/// Container for holding libraries.
dbi: pdb::DebugInformation<'data>,
/// Mapping from addresses starting at the header base to functions.
pub syms: AddressMap<&'data str>,
pub syms: AddressMap<RawSymbol<'data>>,
}

impl<'data> PDB<'data> {
Expand All @@ -37,8 +40,9 @@ fn parse_pdb<'data>(

let mut this = Box::pin(PDB {
file_attrs: AddressMap::default(),
syms: AddressMap::default(),
global_syms: pdb.global_symbols()?,
dbi: pdb.debug_information()?,
syms: AddressMap::default(),
});

// Mapping from offset's to rva's.
Expand All @@ -48,8 +52,8 @@ fn parse_pdb<'data>(
let string_table = pdb.string_table()?;

// PDB module's.
let dbi = pdb.debug_information()?;
let mut modules = dbi.modules()?;
// SAFETY: this is fine because dbi is pinned as part of the PDB.
let mut modules: pdb::ModuleIter<'data> = unsafe { std::mem::transmute(this.dbi.modules()?) };

// Create concurrent interner for caching file path's.
let path_cache = InternMap::new();
Expand All @@ -58,12 +62,27 @@ fn parse_pdb<'data>(
let module_info_queue = SegQueue::new();
let mut id = 0;
while let Some(module) = modules.next()? {
let mut module_name = module.module_name();

// This is sort of insane but this is fine only when the module_name is valid utf8.
// As when module_name isn't utf8 it will be allocated, so check if it's Cow::Owned
if let Cow::Owned(_) = module_name {
module_name = Cow::Borrowed("");
}

let module_name = module_name
.strip_prefix("Import:")
.and_then(|x| x.strip_suffix(".dll"));

// Explained in earlier comment.
let module_name: Option<&'data str> = unsafe { std::mem::transmute(module_name) };

let module_info = match pdb.module_info(&module)? {
Some(info) => info,
None => continue,
};

module_info_queue.push((id, module_info));
module_info_queue.push((id, module_name, module_info));
id += 1;
}

Expand All @@ -79,9 +98,10 @@ fn parse_pdb<'data>(
let mut syms = AddressMap::default();
let mut file_attrs = AddressMap::default();

while let Some((module_id, module_info)) = module_info_queue.pop() {
while let Some((id, module_name, module_info)) = module_info_queue.pop() {
parse_pdb_module(
module_id,
id,
module_name,
base_addr,
&path_cache,
module_info,
Expand Down Expand Up @@ -136,7 +156,7 @@ fn parse_pdb<'data>(

this.syms.push(Addressed {
addr: base_addr + addr,
item: name,
item: RawSymbol { name, module: None },
});
}
Ok(_) => {
Expand All @@ -156,13 +176,14 @@ fn parse_pdb<'data>(
#[allow(clippy::too_many_arguments)]
fn parse_pdb_module<'data>(
module_id: u64,
module_name: Option<&'data str>,
base_addr: usize,
path_cache: &InternMap<u64, Path>,
module_info: pdb::ModuleInfo,
address_map: &pdb::AddressMap,
string_table: &pdb::StringTable<'data>,
file_attrs: &mut AddressMap<FileAttr>,
syms: &mut AddressMap<&'data str>,
syms: &mut AddressMap<RawSymbol<'data>>,
) -> Result<(), pdb::Error> {
let program = module_info.line_program()?;
let mut symbols = module_info.symbols()?;
Expand All @@ -182,7 +203,7 @@ fn parse_pdb_module<'data>(

syms.push(Addressed {
addr: base_addr + addr,
item: name,
item: RawSymbol { name, module: module_name },
});
}
Ok(SymbolData::Procedure(proc)) => {
Expand Down
Loading

0 comments on commit a6a1068

Please sign in to comment.