diff --git a/Cargo.toml b/Cargo.toml index 49b2a23df078..2e57e4e2f95c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ cranelift-codegen = "0.28.0" cranelift-native = "0.28.0" cranelift-entity = "0.28.0" cranelift-wasm = "0.28.0" +wasmtime-debug = { path = "lib/debug" } wasmtime-environ = { path = "lib/environ" } wasmtime-runtime = { path = "lib/runtime" } wasmtime-jit = { path = "lib/jit" } diff --git a/fuzz/fuzz_targets/compile.rs b/fuzz/fuzz_targets/compile.rs index c912b89db5ce..c0483f7c5053 100644 --- a/fuzz/fuzz_targets/compile.rs +++ b/fuzz/fuzz_targets/compile.rs @@ -27,7 +27,7 @@ fuzz_target!(|data: &[u8]| { let mut compiler = Compiler::new(isa); let mut resolver = NullResolver {}; let global_exports = Rc::new(RefCell::new(HashMap::new())); - let _compiled = match CompiledModule::new(&mut compiler, data, &mut resolver, global_exports) { + let _compiled = match CompiledModule::new(&mut compiler, data, &mut resolver, global_exports, false) { Ok(x) => x, Err(_) => return, }; diff --git a/lib/debug/.gitignore b/lib/debug/.gitignore new file mode 100644 index 000000000000..4308d822046d --- /dev/null +++ b/lib/debug/.gitignore @@ -0,0 +1,3 @@ +target/ +**/*.rs.bk +Cargo.lock diff --git a/lib/debug/Cargo.toml b/lib/debug/Cargo.toml new file mode 100644 index 000000000000..2ecaa13aeeb7 --- /dev/null +++ b/lib/debug/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "wasmtime-debug" +version = "0.1.0" +authors = ["The Wasmtime Project Developers"] +description = "Debug utils for WebAsssembly code in Cranelift" +repository = "https://github.com/CraneStation/wasmtime" +documentation = "https://docs.rs/wasmtime-debug/" +categories = ["wasm"] +keywords = ["webassembly", "wasm", "debuginfo"] +license = "Apache-2.0 WITH LLVM-exception" +readme = "README.md" +edition = "2018" + +[dependencies] +gimli = "0.17.0" +wasmparser = { version = "0.28.0" } +cranelift-codegen = "0.28.0" +cranelift-entity = "0.28.0" +cranelift-wasm = "0.28.0" +faerie = "0.7.0" +wasmtime-environ = { path = "../environ", default-features = false } +target-lexicon = { version = "0.2.0", default-features = false } +failure = { version = "0.1.3", default-features = false } +failure_derive = { version = "0.1.3", default-features = false } + +[features] +default = ["std"] +std = ["cranelift-codegen/std", "cranelift-wasm/std"] +core = ["cranelift-codegen/core", "cranelift-wasm/core"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "CraneStation/wasmtime" } diff --git a/lib/debug/LICENSE b/lib/debug/LICENSE new file mode 100644 index 000000000000..f9d81955f4bc --- /dev/null +++ b/lib/debug/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/lib/debug/README.md b/lib/debug/README.md new file mode 100644 index 000000000000..d102c7411429 --- /dev/null +++ b/lib/debug/README.md @@ -0,0 +1,4 @@ +This is the `wasmtime-debug` crate, which provides functionality to +read, transform, and write DWARF section. + +[`wasmtime-debug`]: https://crates.io/crates/wasmtime-debug diff --git a/lib/debug/src/address_transform.rs b/lib/debug/src/address_transform.rs new file mode 100644 index 000000000000..8f19ecddf1b1 --- /dev/null +++ b/lib/debug/src/address_transform.rs @@ -0,0 +1,139 @@ +use crate::read_debuginfo::WasmFileInfo; +use cranelift_entity::{EntityRef, PrimaryMap}; +use cranelift_wasm::DefinedFuncIndex; +use gimli::write; +use std::collections::BTreeMap; +use std::ops::Bound::{Included, Unbounded}; +use std::vec::Vec; +use wasmtime_environ::AddressTransforms; + +pub type GeneratedAddress = usize; +pub type WasmAddress = u64; +pub type SymbolIndex = usize; + +#[derive(Debug)] +pub struct AddressMap { + pub generated: GeneratedAddress, + pub wasm: WasmAddress, +} + +#[derive(Debug)] +pub struct FunctionMap { + pub offset: GeneratedAddress, + pub len: GeneratedAddress, + pub addresses: Box<[AddressMap]>, +} + +#[derive(Debug)] +pub struct AddressTransform { + lookup: BTreeMap, + map: PrimaryMap, + func_ranges: Vec<(usize, usize)>, +} + +impl AddressTransform { + pub fn new(at: &AddressTransforms, wasm_file: &WasmFileInfo) -> Self { + let code_section_offset = wasm_file.code_section_offset; + let function_offsets = &wasm_file.function_offsets_and_sizes; + let mut lookup = BTreeMap::new(); + let mut map = PrimaryMap::new(); + let mut func_ranges = Vec::new(); + for (i, ft) in at { + let index = i.index(); + let (fn_offset, fn_size) = function_offsets[index]; + assert!(code_section_offset <= fn_offset); + let fn_offset: WasmAddress = fn_offset - code_section_offset; + let fn_size = fn_size as WasmAddress; + func_ranges.push((ft.body_offset, ft.body_offset + ft.body_len)); + lookup.insert( + fn_offset as WasmAddress, + (index, ft.body_offset, ft.body_offset), + ); + let mut fn_map = Vec::new(); + for t in &ft.locations { + if t.srcloc.is_default() { + // TODO extend some range if possible + continue; + } + // src_offset is a wasm bytecode offset in the code section + let src_offset = t.srcloc.bits() as WasmAddress + fn_offset; + assert!(fn_offset <= src_offset && src_offset <= fn_offset + fn_size); + lookup.insert( + src_offset, + (index, t.code_offset, t.code_offset + t.code_len), + ); + fn_map.push(AddressMap { + generated: t.code_offset, + wasm: src_offset, + }); + } + let last_addr = ft.body_offset + ft.body_len; + lookup.insert(fn_offset + fn_size, (index, last_addr, last_addr)); + fn_map.sort_by(|a, b| a.generated.cmp(&b.generated)); + map.push(FunctionMap { + offset: ft.body_offset, + len: ft.body_len, + addresses: fn_map.into_boxed_slice(), + }); + } + AddressTransform { + lookup, + map, + func_ranges, + } + } + + pub fn translate(&self, addr: u64) -> Option { + if addr == 0 { + // It's normally 0 for debug info without the linked code. + return None; + } + let search = self.lookup.range((Unbounded, Included(addr))); + if let Some((_, value)) = search.last() { + return Some(write::Address::Relative { + symbol: value.0, + addend: value.1 as i64, + }); + } + // Address was not found: function was not compiled? + None + } + + pub fn diff(&self, addr1: u64, addr2: u64) -> Option { + let t1 = self.translate(addr1); + let t2 = self.translate(addr2); + if t1.is_none() || t2.is_none() { + return None; + } + if let ( + Some(write::Address::Relative { + symbol: s1, + addend: a, + }), + Some(write::Address::Relative { + symbol: s2, + addend: b, + }), + ) = (t1, t2) + { + if s1 != s2 { + panic!("different symbol"); + } + Some((b - a) as u64) + } else { + unreachable!(); + } + } + + pub fn delta(&self, addr1: u64, u: u64) -> Option { + self.diff(addr1, addr1 + u) + } + + pub fn map(&self) -> &PrimaryMap { + &self.map + } + + pub fn func_range(&self, index: usize) -> (usize, usize) { + self.func_ranges[index] + } +} diff --git a/lib/debug/src/lib.rs b/lib/debug/src/lib.rs new file mode 100644 index 000000000000..ccbe9df10b3e --- /dev/null +++ b/lib/debug/src/lib.rs @@ -0,0 +1,181 @@ +//! Debug utils for WebAssembly using Cranelift. +use cranelift_codegen::isa::TargetFrontendConfig; +use faerie::{Artifact, Decl}; +use failure::Error; +use target_lexicon::{BinaryFormat, Triple}; + +pub use crate::read_debuginfo::{read_debuginfo, DebugInfoData}; +pub use crate::transform::transform_dwarf; +pub use crate::write_debuginfo::{emit_dwarf, ResolvedSymbol, SymbolResolver}; + +use wasmtime_environ::AddressTransforms; + +mod address_transform; +mod read_debuginfo; +mod transform; +mod write_debuginfo; + +#[macro_use] +extern crate failure_derive; + +struct FunctionRelocResolver {} +impl SymbolResolver for FunctionRelocResolver { + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol { + let name = format!("_wasm_function_{}", symbol); + ResolvedSymbol::Reloc { name, addend } + } +} + +pub fn emit_debugsections( + obj: &mut Artifact, + target_config: &TargetFrontendConfig, + debuginfo_data: &DebugInfoData, + at: &AddressTransforms, +) -> Result<(), Error> { + let dwarf = transform_dwarf(target_config, debuginfo_data, at)?; + let resolver = FunctionRelocResolver {}; + emit_dwarf(obj, dwarf, &resolver); + Ok(()) +} + +struct ImageRelocResolver<'a> { + func_offsets: &'a Vec, +} + +impl<'a> SymbolResolver for ImageRelocResolver<'a> { + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol { + let func_start = self.func_offsets[symbol]; + ResolvedSymbol::PhysicalAddress(func_start + addend as u64) + } +} + +pub fn emit_debugsections_image( + triple: Triple, + target_config: &TargetFrontendConfig, + debuginfo_data: &DebugInfoData, + at: &AddressTransforms, + funcs: &Vec<(*const u8, usize)>, +) -> Result, Error> { + let ref func_offsets = funcs + .iter() + .map(|(ptr, _)| *ptr as u64) + .collect::>(); + let mut obj = Artifact::new(triple, String::from("module")); + let dwarf = transform_dwarf(target_config, debuginfo_data, at)?; + let resolver = ImageRelocResolver { func_offsets }; + + // Assuming all functions in the same code block, looking min/max of its range. + assert!(funcs.len() > 0); + let mut segment_body: (usize, usize) = (!0, 0); + for (body_ptr, body_len) in funcs.iter() { + segment_body.0 = ::std::cmp::min(segment_body.0, *body_ptr as usize); + segment_body.1 = ::std::cmp::max(segment_body.1, *body_ptr as usize + body_len); + } + let segment_body = (segment_body.0 as *const u8, segment_body.1 - segment_body.0); + + let body = unsafe { ::std::slice::from_raw_parts(segment_body.0, segment_body.1) }; + obj.declare_with("all", Decl::Function { global: false }, body.to_vec())?; + + emit_dwarf(&mut obj, dwarf, &resolver); + + // LLDB is too "magical" about mach-o, generating elf + let mut bytes = obj.emit_as(BinaryFormat::Elf)?; + // elf is still missing details... + convert_faerie_elf_to_loadable_file(&mut bytes, segment_body.0); + + // let mut file = ::std::fs::File::create(::std::path::Path::new("test.o")).expect("file"); + // ::std::io::Write::write(&mut file, &bytes).expect("write"); + + Ok(bytes) +} + +fn convert_faerie_elf_to_loadable_file(bytes: &mut Vec, code_ptr: *const u8) { + use std::ffi::CStr; + use std::os::raw::c_char; + + assert!( + bytes[0x4] == 2 && bytes[0x5] == 1, + "bits and endianess in .ELF" + ); + let e_phoff = unsafe { *(bytes.as_ptr().offset(0x20) as *const u64) }; + let e_phnum = unsafe { *(bytes.as_ptr().offset(0x38) as *const u16) }; + assert!( + e_phoff == 0 && e_phnum == 0, + "program header table is empty" + ); + let e_phentsize = unsafe { *(bytes.as_ptr().offset(0x36) as *const u16) }; + assert!(e_phentsize == 0x38, "size of ph"); + let e_shentsize = unsafe { *(bytes.as_ptr().offset(0x3A) as *const u16) }; + assert!(e_shentsize == 0x40, "size of sh"); + + let e_shoff = unsafe { *(bytes.as_ptr().offset(0x28) as *const u64) }; + let e_shnum = unsafe { *(bytes.as_ptr().offset(0x3C) as *const u16) }; + let mut shstrtab_off = 0; + let mut segment = None; + for i in 0..e_shnum { + let off = e_shoff as isize + i as isize * e_shentsize as isize; + let sh_type = unsafe { *(bytes.as_ptr().offset(off + 0x4) as *const u32) }; + if sh_type == /* SHT_SYMTAB */ 3 { + shstrtab_off = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) }; + } + if sh_type != /* SHT_PROGBITS */ 1 { + continue; + } + // It is a SHT_PROGBITS, but we need to check sh_name to ensure it is our function + let sh_name = unsafe { + let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32); + CStr::from_ptr( + bytes + .as_ptr() + .offset((shstrtab_off + sh_name_off as u64) as isize) + as *const c_char, + ) + .to_str() + .expect("name") + }; + if sh_name != ".text.all" { + continue; + } + + assert!(segment.is_none()); + // Functions was added at emit_debugsections_image as .text.all. + // Patch vaddr, and save file location and its size. + unsafe { + *(bytes.as_ptr().offset(off + 0x10) as *mut u64) = code_ptr as u64; + }; + let sh_offset = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) }; + let sh_size = unsafe { *(bytes.as_ptr().offset(off + 0x20) as *const u64) }; + segment = Some((sh_offset, code_ptr, sh_size)); + // Fix name too: cut it to just ".text" + unsafe { + let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32); + bytes[(shstrtab_off + sh_name_off as u64) as usize + ".text".len()] = 0; + } + } + + // LLDB wants segment with virtual address set, placing them at the end of ELF. + let ph_off = bytes.len(); + if let Some((sh_offset, v_offset, sh_size)) = segment { + let mut segment = Vec::with_capacity(0x38); + segment.resize(0x38, 0); + unsafe { + *(segment.as_ptr() as *mut u32) = /* PT_LOAD */ 0x1; + *(segment.as_ptr().offset(0x8) as *mut u64) = sh_offset; + *(segment.as_ptr().offset(0x10) as *mut u64) = v_offset as u64; + *(segment.as_ptr().offset(0x18) as *mut u64) = v_offset as u64; + *(segment.as_ptr().offset(0x20) as *mut u64) = sh_size; + *(segment.as_ptr().offset(0x28) as *mut u64) = sh_size; + } + bytes.extend_from_slice(&segment); + } else { + unreachable!(); + } + + // It is somewhat loadable ELF file at this moment. + // Update e_flags, e_phoff and e_phnum. + unsafe { + *(bytes.as_ptr().offset(0x10) as *mut u16) = /* ET_DYN */ 3; + *(bytes.as_ptr().offset(0x20) as *mut u64) = ph_off as u64; + *(bytes.as_ptr().offset(0x38) as *mut u16) = 1 as u16; + } +} diff --git a/lib/debug/src/read_debuginfo.rs b/lib/debug/src/read_debuginfo.rs new file mode 100644 index 000000000000..70e88fb36f3b --- /dev/null +++ b/lib/debug/src/read_debuginfo.rs @@ -0,0 +1,134 @@ +use std::collections::HashMap; +use wasmparser::{ModuleReader, SectionCode}; + +use gimli; + +use gimli::{ + DebugAbbrev, DebugAddr, DebugInfo, DebugLine, DebugLineStr, DebugLoc, DebugLocLists, + DebugRanges, DebugRngLists, DebugStr, DebugStrOffsets, DebugTypes, EndianSlice, LittleEndian, + LocationLists, RangeLists, +}; + +trait Reader: gimli::Reader {} + +impl<'input> Reader for gimli::EndianSlice<'input, LittleEndian> {} + +pub type Dwarf<'input> = gimli::Dwarf>; + +#[derive(Debug)] +pub struct WasmFileInfo { + pub code_section_offset: u64, + pub function_offsets_and_sizes: Box<[(u64, u32)]>, +} + +#[derive(Debug)] +pub struct DebugInfoData<'a> { + pub dwarf: Dwarf<'a>, + pub wasm_file: WasmFileInfo, +} + +fn convert_sections<'a>(sections: HashMap<&str, &'a [u8]>) -> Dwarf<'a> { + let endian = LittleEndian; + let debug_str = DebugStr::new(sections[".debug_str"], endian); + let debug_abbrev = DebugAbbrev::new(sections[".debug_abbrev"], endian); + let debug_info = DebugInfo::new(sections[".debug_info"], endian); + let debug_line = DebugLine::new(sections[".debug_line"], endian); + + if sections.contains_key(".debug_addr") { + panic!("Unexpected .debug_addr"); + } + + let debug_addr = DebugAddr::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_line_str") { + panic!("Unexpected .debug_line_str"); + } + + let debug_line_str = DebugLineStr::from(EndianSlice::new(&[], endian)); + let debug_str_sup = DebugStr::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_rnglists") { + panic!("Unexpected .debug_rnglists"); + } + + let debug_ranges = match sections.get(".debug_ranges") { + Some(section) => DebugRanges::new(section, endian), + None => DebugRanges::new(&[], endian), + }; + let debug_rnglists = DebugRngLists::new(&[], endian); + let ranges = RangeLists::new(debug_ranges, debug_rnglists); + + if sections.contains_key(".debug_loclists") { + panic!("Unexpected .debug_loclists"); + } + + let debug_loc = match sections.get(".debug_loc") { + Some(section) => DebugLoc::new(section, endian), + None => DebugLoc::new(&[], endian), + }; + let debug_loclists = DebugLocLists::new(&[], endian); + let locations = LocationLists::new(debug_loc, debug_loclists); + + if sections.contains_key(".debug_str_offsets") { + panic!("Unexpected .debug_str_offsets"); + } + + let debug_str_offsets = DebugStrOffsets::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_types") { + panic!("Unexpected .debug_types"); + } + + let debug_types = DebugTypes::from(EndianSlice::new(&[], endian)); + + Dwarf { + debug_abbrev, + debug_addr, + debug_info, + debug_line, + debug_line_str, + debug_str, + debug_str_offsets, + debug_str_sup, + debug_types, + locations, + ranges, + } +} + +pub fn read_debuginfo(data: &[u8]) -> DebugInfoData { + let mut reader = ModuleReader::new(data).expect("reader"); + let mut sections = HashMap::new(); + let mut code_section_offset = 0; + let mut function_offsets_and_sizes = Vec::new(); + while !reader.eof() { + let section = reader.read().expect("section"); + if let SectionCode::Custom { name, .. } = section.code { + if name.starts_with(".debug_") { + let mut reader = section.get_binary_reader(); + let len = reader.bytes_remaining(); + sections.insert(name, reader.read_bytes(len).expect("bytes")); + } + } + if let SectionCode::Code = section.code { + code_section_offset = section.range().start as u64; + // TODO remove me later + let mut reader = section.get_code_section_reader().expect("code reader"); + for _ in 0..reader.get_count() { + let body = reader.read().expect("function body read"); + let range = body.range(); + let fn_body_size = range.end - range.start; + let fn_body_offset = range.start; + function_offsets_and_sizes.push((fn_body_offset as u64, fn_body_size as u32)); + } + } + } + let function_offsets_and_sizes = function_offsets_and_sizes.into_boxed_slice(); + DebugInfoData { + dwarf: convert_sections(sections), + wasm_file: WasmFileInfo { + code_section_offset, + function_offsets_and_sizes, + }, + } +} diff --git a/lib/debug/src/transform.rs b/lib/debug/src/transform.rs new file mode 100644 index 000000000000..114dac3b1d65 --- /dev/null +++ b/lib/debug/src/transform.rs @@ -0,0 +1,638 @@ +use crate::address_transform::AddressTransform; +pub use crate::read_debuginfo::DebugInfoData; +use cranelift_codegen::isa::TargetFrontendConfig; +use cranelift_entity::EntityRef; +use failure::Error; +use std::collections::{BTreeMap, HashMap}; +use std::ops::Bound::{Included, Unbounded}; + +use gimli; + +use gimli::{ + AttributeValue, CompilationUnitHeader, DebugAbbrev, DebugAddr, DebugAddrBase, DebugLine, + DebugLineOffset, DebugStr, DebuggingInformationEntry, LineEncoding, LocationLists, RangeLists, + UnitOffset, +}; + +use gimli::write; + +trait Reader: gimli::Reader {} + +impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where Endian: gimli::Endianity {} + +#[derive(Fail, Debug)] +#[fail(display = "Debug info transform error: {}", _0)] +pub struct TransformError(&'static str); + +pub struct TransformedDwarf { + pub encoding: gimli::Encoding, + pub strings: write::StringTable, + pub units: write::UnitTable, + pub line_strings: write::LineStringTable, + pub range_lists: write::RangeListTable, +} + +struct DebugInputContext<'a, R> +where + R: Reader, +{ + debug_abbrev: &'a DebugAbbrev, + debug_str: &'a DebugStr, + debug_line: &'a DebugLine, + debug_addr: &'a DebugAddr, + debug_addr_base: DebugAddrBase, + rnglists: &'a RangeLists, + loclists: &'a LocationLists, +} + +type PendingDieRef = (write::UnitEntryId, gimli::DwAt, UnitOffset); + +enum FileAttributeContext<'a> { + Root(Option), + Children(&'a Vec), +} + +fn clone_die_attributes<'a, R>( + entry: &DebuggingInformationEntry, + context: &DebugInputContext, + addr_tr: &'a AddressTransform, + unit_encoding: &gimli::Encoding, + current_scope: &mut write::DebuggingInformationEntry, + current_scope_id: write::UnitEntryId, + subprogram_range: Option<(write::Address, u64)>, + out_strings: &mut write::StringTable, + die_ref_map: &HashMap, + pending_die_refs: &mut Vec, + file_context: FileAttributeContext<'a>, +) -> Result<(), Error> +where + R: Reader, +{ + let _tag = &entry.tag(); + let mut attrs = entry.attrs(); + let mut low_pc = None; + while let Some(attr) = attrs.next()? { + let attr_value = match attr.value() { + AttributeValue::Addr(_) + if attr.name() == gimli::DW_AT_low_pc && subprogram_range.is_some() => + { + write::AttributeValue::Address(subprogram_range.unwrap().0) + } + AttributeValue::Udata(_) + if attr.name() == gimli::DW_AT_high_pc && subprogram_range.is_some() => + { + write::AttributeValue::Udata(subprogram_range.unwrap().1) + } + AttributeValue::Addr(u) => { + let addr = addr_tr.translate(u).unwrap_or(write::Address::Absolute(0)); + if attr.name() == gimli::DW_AT_low_pc { + low_pc = Some((u, addr)); + } + write::AttributeValue::Address(addr) + } + AttributeValue::Udata(u) => { + if attr.name() != gimli::DW_AT_high_pc || low_pc.is_none() { + write::AttributeValue::Udata(u) + } else { + let u = addr_tr.delta(low_pc.unwrap().0, u).unwrap_or(0); + write::AttributeValue::Udata(u) + } + } + AttributeValue::Data1(d) => write::AttributeValue::Data1(d), + AttributeValue::Data2(d) => write::AttributeValue::Data2(d), + AttributeValue::Data4(d) => write::AttributeValue::Data4(d), + AttributeValue::Sdata(d) => write::AttributeValue::Sdata(d), + AttributeValue::Flag(f) => write::AttributeValue::Flag(f), + AttributeValue::DebugLineRef(line_program_offset) => { + if let FileAttributeContext::Root(o) = file_context { + if o != Some(line_program_offset) { + return Err(TransformError("invalid debug_line offset").into()); + } + write::AttributeValue::LineProgramRef + } else { + return Err(TransformError("unexpected debug_line index attribute").into()); + } + } + AttributeValue::FileIndex(i) => { + if let FileAttributeContext::Children(file_map) = file_context { + write::AttributeValue::FileIndex(Some(file_map[(i - 1) as usize])) + } else { + return Err(TransformError("unexpected file index attribute").into()); + } + } + AttributeValue::DebugStrRef(str_offset) => { + let s = context.debug_str.get_str(str_offset)?.to_slice()?.to_vec(); + write::AttributeValue::StringRef(out_strings.add(s)) + } + AttributeValue::RangeListsRef(r) => { + let low_pc = 0; + let mut ranges = context.rnglists.ranges( + r, + *unit_encoding, + low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + let mut _result = Vec::new(); + while let Some(range) = ranges.next()? { + assert!(range.begin <= range.end); + _result.push((range.begin as i64, range.end as i64)); + } + // FIXME _result contains invalid code offsets; translate_address + continue; // ignore attribute + } + AttributeValue::LocationListsRef(r) => { + let low_pc = 0; + let mut locs = context.loclists.locations( + r, + *unit_encoding, + low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + let mut _result = Vec::new(); + while let Some(loc) = locs.next()? { + _result.push((loc.range.begin as i64, loc.range.end as i64, loc.data.0)); + } + // FIXME _result contains invalid expressions and code offsets + continue; // ignore attribute + } + AttributeValue::Exprloc(ref _expr) => { + // FIXME _expr contains invalid expression + continue; // ignore attribute + } + AttributeValue::Encoding(e) => write::AttributeValue::Encoding(e), + AttributeValue::DecimalSign(e) => write::AttributeValue::DecimalSign(e), + AttributeValue::Endianity(e) => write::AttributeValue::Endianity(e), + AttributeValue::Accessibility(e) => write::AttributeValue::Accessibility(e), + AttributeValue::Visibility(e) => write::AttributeValue::Visibility(e), + AttributeValue::Virtuality(e) => write::AttributeValue::Virtuality(e), + AttributeValue::Language(e) => write::AttributeValue::Language(e), + AttributeValue::AddressClass(e) => write::AttributeValue::AddressClass(e), + AttributeValue::IdentifierCase(e) => write::AttributeValue::IdentifierCase(e), + AttributeValue::CallingConvention(e) => write::AttributeValue::CallingConvention(e), + AttributeValue::Inline(e) => write::AttributeValue::Inline(e), + AttributeValue::Ordering(e) => write::AttributeValue::Ordering(e), + AttributeValue::UnitRef(ref offset) => { + if let Some(unit_id) = die_ref_map.get(offset) { + write::AttributeValue::ThisUnitEntryRef(*unit_id) + } else { + pending_die_refs.push((current_scope_id, attr.name(), *offset)); + continue; + } + } + // AttributeValue::DebugInfoRef(_) => { + // continue; + // } + _ => panic!(), //write::AttributeValue::StringRef(out_strings.add("_")), + }; + current_scope.set(attr.name(), attr_value); + } + Ok(()) +} + +fn clone_attr_string( + attr_value: &AttributeValue, + form: gimli::DwForm, + debug_str: &DebugStr, + out_strings: &mut write::StringTable, +) -> Result +where + R: Reader, +{ + let content = match attr_value { + AttributeValue::DebugStrRef(str_offset) => { + debug_str.get_str(*str_offset)?.to_slice()?.to_vec() + } + AttributeValue::String(b) => b.to_slice()?.to_vec(), + _ => panic!("Unexpected attribute value"), + }; + Ok(match form { + gimli::DW_FORM_strp => { + let id = out_strings.add(content); + write::LineString::StringRef(id) + } + gimli::DW_FORM_string => write::LineString::String(content), + _ => panic!("DW_FORM_line_strp or other not supported"), + }) +} + +#[derive(Debug)] +enum SavedLineProgramRow { + Normal { + address: u64, + op_index: u64, + file_index: u64, + line: u64, + column: u64, + discriminator: u64, + is_stmt: bool, + basic_block: bool, + prologue_end: bool, + epilogue_begin: bool, + isa: u64, + }, + EndOfSequence(u64), +} + +#[derive(Debug, Eq, PartialEq)] +enum ReadLineProgramState { + SequenceEnded, + ReadSequence, + IgnoreSequence, +} + +fn clone_line_program( + unit: &CompilationUnitHeader, + root: &DebuggingInformationEntry, + addr_tr: &AddressTransform, + out_encoding: &gimli::Encoding, + debug_str: &DebugStr, + debug_line: &DebugLine, + out_strings: &mut write::StringTable, +) -> Result<(write::LineProgram, DebugLineOffset, Vec), Error> +where + R: Reader, +{ + let offset = match root.attr_value(gimli::DW_AT_stmt_list)? { + Some(gimli::AttributeValue::DebugLineRef(offset)) => offset, + _ => { + return Err(TransformError("Debug line offset is not found").into()); + } + }; + let comp_dir = root.attr_value(gimli::DW_AT_comp_dir)?; + let comp_name = root.attr_value(gimli::DW_AT_name)?; + let out_comp_dir = clone_attr_string( + comp_dir.as_ref().expect("comp_dir"), + gimli::DW_FORM_strp, + debug_str, + out_strings, + )?; + let out_comp_name = clone_attr_string( + comp_name.as_ref().expect("comp_name"), + gimli::DW_FORM_strp, + debug_str, + out_strings, + )?; + + let program = debug_line.program( + offset, + unit.address_size(), + comp_dir.and_then(|val| val.string_value(&debug_str)), + comp_name.and_then(|val| val.string_value(&debug_str)), + ); + if let Ok(program) = program { + let header = program.header(); + assert!(header.version() <= 4, "not supported 5"); + let line_encoding = LineEncoding { + minimum_instruction_length: header.minimum_instruction_length(), + maximum_operations_per_instruction: header.maximum_operations_per_instruction(), + default_is_stmt: header.default_is_stmt(), + line_base: header.line_base(), + line_range: header.line_range(), + }; + let mut out_program = write::LineProgram::new( + *out_encoding, + line_encoding, + out_comp_dir, + out_comp_name, + None, + ); + let mut dirs = Vec::new(); + dirs.push(out_program.default_directory()); + for dir_attr in header.include_directories() { + let dir_id = out_program.add_directory(clone_attr_string( + dir_attr, + gimli::DW_FORM_string, + debug_str, + out_strings, + )?); + dirs.push(dir_id); + } + let mut files = Vec::new(); + for file_entry in header.file_names() { + let dir_id = dirs[file_entry.directory_index() as usize]; + let file_id = out_program.add_file( + clone_attr_string( + &file_entry.path_name(), + gimli::DW_FORM_string, + debug_str, + out_strings, + )?, + dir_id, + None, + ); + files.push(file_id); + } + + let mut rows = program.rows(); + let mut saved_rows = BTreeMap::new(); + let mut state = ReadLineProgramState::SequenceEnded; + while let Some((_header, row)) = rows.next_row()? { + if state == ReadLineProgramState::IgnoreSequence { + if row.end_sequence() { + state = ReadLineProgramState::SequenceEnded; + } + continue; + } + let saved_row = if row.end_sequence() { + state = ReadLineProgramState::SequenceEnded; + SavedLineProgramRow::EndOfSequence(row.address()) + } else { + if state == ReadLineProgramState::SequenceEnded { + // Discard sequences for non-existent code. + if row.address() == 0 { + state = ReadLineProgramState::IgnoreSequence; + continue; + } + state = ReadLineProgramState::ReadSequence; + } + SavedLineProgramRow::Normal { + address: row.address(), + op_index: row.op_index(), + file_index: row.file_index(), + line: row.line().unwrap_or(0), + column: match row.column() { + gimli::ColumnType::LeftEdge => 0, + gimli::ColumnType::Column(val) => val, + }, + discriminator: row.discriminator(), + is_stmt: row.is_stmt(), + basic_block: row.basic_block(), + prologue_end: row.prologue_end(), + epilogue_begin: row.epilogue_begin(), + isa: row.isa(), + } + }; + saved_rows.insert(row.address(), saved_row); + } + + for (i, map) in addr_tr.map() { + let symbol = i.index(); + let base_addr = map.offset; + out_program.begin_sequence(Some(write::Address::Relative { symbol, addend: 0 })); + // TODO track and place function declaration line here + let mut last_address = None; + for addr_map in map.addresses.iter() { + let mut saved_row = saved_rows.get(&addr_map.wasm); + if saved_row.is_none() { + // No direct match -- repeat search with range. + saved_row = saved_rows + .range((Unbounded, Included(addr_map.wasm))) + .last() + .map(|p| p.1); + } + if let Some(SavedLineProgramRow::Normal { + address, + op_index, + file_index, + line, + column, + discriminator, + is_stmt, + basic_block, + prologue_end, + epilogue_begin, + isa, + }) = saved_row + { + // Ignore duplicates + if Some(*address) != last_address { + let address_offset = if last_address.is_none() { + // Extend first entry to the function declaration + // TODO use the function declaration line instead + 0 + } else { + (addr_map.generated - base_addr) as u64 + }; + out_program.row().address_offset = address_offset; + out_program.row().op_index = *op_index; + out_program.row().file = files[(file_index - 1) as usize]; + out_program.row().line = *line; + out_program.row().column = *column; + out_program.row().discriminator = *discriminator; + out_program.row().is_statement = *is_stmt; + out_program.row().basic_block = *basic_block; + out_program.row().prologue_end = *prologue_end; + out_program.row().epilogue_begin = *epilogue_begin; + out_program.row().isa = *isa; + out_program.generate_row(); + last_address = Some(*address); + } + } + } + let end_addr = (map.offset + map.len - 1) as u64; + out_program.end_sequence(end_addr); + } + Ok((out_program, offset, files)) + } else { + Err(TransformError("Valid line program not found").into()) + } +} + +fn get_subprogram_range<'a, R>( + entry: &DebuggingInformationEntry, + addr_tr: &'a AddressTransform, +) -> Result, Error> +where + R: Reader, +{ + let low_pc = entry.attr_value(gimli::DW_AT_low_pc)?; + if let Some(AttributeValue::Addr(addr)) = low_pc { + let transformed = addr_tr.translate(addr); + if let Some(write::Address::Relative { symbol, .. }) = transformed { + let range = addr_tr.func_range(symbol); + let addr = write::Address::Relative { + symbol, + addend: range.0 as i64, + }; + let len = (range.1 - range.0) as u64; + return Ok(Some((addr, len))); + } + } + Ok(None) +} + +fn clone_unit<'a, R>( + unit: &CompilationUnitHeader, + context: &DebugInputContext, + addr_tr: &'a AddressTransform, + out_encoding: &gimli::Encoding, + out_units: &mut write::UnitTable, + out_strings: &mut write::StringTable, +) -> Result<(), Error> +where + R: Reader, +{ + let abbrevs = unit.abbreviations(context.debug_abbrev)?; + + let mut die_ref_map = HashMap::new(); + let mut pending_die_refs = Vec::new(); + let mut stack = Vec::new(); + + // Iterate over all of this compilation unit's entries. + let mut entries = unit.entries(&abbrevs); + let (comp_unit, file_map) = if let Some((depth_delta, entry)) = entries.next_dfs()? { + assert!(depth_delta == 0); + let (out_line_program, debug_line_offset, file_map) = clone_line_program( + unit, + entry, + addr_tr, + out_encoding, + context.debug_str, + context.debug_line, + out_strings, + )?; + + if entry.tag() == gimli::DW_TAG_compile_unit { + let unit_id = out_units.add(write::Unit::new(*out_encoding, out_line_program)); + let comp_unit = out_units.get_mut(unit_id); + + let root_id = comp_unit.root(); + die_ref_map.insert(entry.offset(), root_id); + + clone_die_attributes( + entry, + context, + addr_tr, + &unit.encoding(), + comp_unit.get_mut(root_id), + root_id, + None, + out_strings, + &die_ref_map, + &mut pending_die_refs, + FileAttributeContext::Root(Some(debug_line_offset)), + )?; + + stack.push(root_id); + (comp_unit, file_map) + } else { + return Err(TransformError("Unexpected unit header").into()); + } + } else { + return Ok(()); // empty + }; + let mut skip_at_depth = None; + while let Some((depth_delta, entry)) = entries.next_dfs()? { + let depth_delta = if let Some(depth) = skip_at_depth { + let new_depth = depth + depth_delta; + if new_depth >= 0 { + skip_at_depth = Some(new_depth); + continue; + } + skip_at_depth = None; + new_depth + } else { + depth_delta + }; + let range = if entry.tag() == gimli::DW_TAG_subprogram { + let range = get_subprogram_range(entry, addr_tr)?; + if range.is_none() { + // Subprogram was not compiled: discarding all its info. + skip_at_depth = Some(0); + continue; + } + range + } else { + None + }; + + if depth_delta <= 0 { + for _ in depth_delta..1 { + stack.pop(); + } + } else { + assert!(depth_delta == 1); + } + let parent = stack.last().unwrap(); + let die_id = comp_unit.add(*parent, entry.tag()); + let current_scope = comp_unit.get_mut(die_id); + + stack.push(die_id); + die_ref_map.insert(entry.offset(), die_id); + + clone_die_attributes( + entry, + context, + addr_tr, + &unit.encoding(), + current_scope, + die_id, + range, + out_strings, + &die_ref_map, + &mut pending_die_refs, + FileAttributeContext::Children(&file_map), + )?; + } + for (die_id, attr_name, offset) in pending_die_refs { + let die = comp_unit.get_mut(die_id); + let unit_id = die_ref_map[&offset]; + die.set(attr_name, write::AttributeValue::ThisUnitEntryRef(unit_id)); + } + Ok(()) +} + +pub fn transform_dwarf( + target_config: &TargetFrontendConfig, + di: &DebugInfoData, + at: &wasmtime_environ::AddressTransforms, +) -> Result { + let context = DebugInputContext { + debug_abbrev: &di.dwarf.debug_abbrev, + debug_str: &di.dwarf.debug_str, + debug_line: &di.dwarf.debug_line, + debug_addr: &di.dwarf.debug_addr, + debug_addr_base: DebugAddrBase(0), + rnglists: &di.dwarf.ranges, + loclists: &di.dwarf.locations, + }; + + let out_encoding = gimli::Encoding { + format: gimli::Format::Dwarf32, + // TODO: this should be configurable + // macOS doesn't seem to support DWARF > 3 + version: 3, + address_size: target_config.pointer_bytes(), + }; + + let addr_tr = AddressTransform::new(at, &di.wasm_file); + + let mut out_strings = write::StringTable::default(); + let mut out_units = write::UnitTable::default(); + + let out_range_lists = write::RangeListTable::default(); + let out_line_strings = write::LineStringTable::default(); + + let mut iter = di.dwarf.debug_info.units(); + while let Some(ref unit) = iter.next().unwrap_or(None) { + clone_unit( + unit, + &context, + &addr_tr, + &out_encoding, + &mut out_units, + &mut out_strings, + )?; + } + + // let unit_range_list = write::RangeList(Vec::new()); + // let unit_range_list_id = out_range_lists.add(unit_range_list.clone()); + // let unit = dwarf.units.get_mut(self.unit_id); + // let root = unit.root(); + // let root = unit.get_mut(root); + // root.set( + // gimli::DW_AT_ranges, + // AttributeValue::RangeListRef(unit_range_list_id), + // ); + + //println!("{:?} \n====\n {:?}", di, at); + Ok(TransformedDwarf { + encoding: out_encoding, + strings: out_strings, + units: out_units, + line_strings: out_line_strings, + range_lists: out_range_lists, + }) +} diff --git a/lib/debug/src/write_debuginfo.rs b/lib/debug/src/write_debuginfo.rs new file mode 100644 index 000000000000..2645ee556cd9 --- /dev/null +++ b/lib/debug/src/write_debuginfo.rs @@ -0,0 +1,207 @@ +use crate::transform::TransformedDwarf; + +use gimli::write::{ + Address, DebugAbbrev, DebugInfo, DebugLine, DebugLineStr, DebugRanges, DebugRngLists, DebugStr, + EndianVec, Result, SectionId, Sections, Writer, +}; +use gimli::RunTimeEndian; + +use faerie::artifact::Decl; +use faerie::*; + +struct DebugReloc { + offset: u32, + size: u8, + name: String, + addend: i64, +} + +macro_rules! decl_section { + ($artifact:ident . $section:ident = $name:expr) => { + $artifact + .declare_with( + SectionId::$section.name(), + Decl::DebugSection, + $name.0.writer.into_vec(), + ) + .unwrap(); + }; +} + +macro_rules! sect_relocs { + ($artifact:ident . $section:ident = $name:expr) => { + for reloc in $name.0.relocs { + $artifact + .link_with( + faerie::Link { + from: SectionId::$section.name(), + to: &reloc.name, + at: u64::from(reloc.offset), + }, + faerie::Reloc::Debug { + size: reloc.size, + addend: reloc.addend as i32, + }, + ) + .expect("faerie relocation error"); + } + }; +} + +pub enum ResolvedSymbol { + PhysicalAddress(u64), + Reloc { name: String, addend: i64 }, +} + +pub trait SymbolResolver { + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol; +} + +pub fn emit_dwarf( + artifact: &mut Artifact, + mut dwarf: TransformedDwarf, + symbol_resolver: &SymbolResolver, +) { + let endian = RunTimeEndian::Little; + let debug_abbrev = DebugAbbrev::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_info = DebugInfo::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_str = DebugStr::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_line = DebugLine::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_ranges = DebugRanges::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_rnglists = DebugRngLists::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_line_str = DebugLineStr::from(WriterRelocate::new(endian, symbol_resolver)); + + let mut sections = Sections { + debug_abbrev, + debug_info, + debug_line, + debug_line_str, + debug_ranges, + debug_rnglists, + debug_str, + }; + + let debug_str_offsets = dwarf.strings.write(&mut sections.debug_str).unwrap(); + let debug_line_str_offsets = dwarf + .line_strings + .write(&mut sections.debug_line_str) + .unwrap(); + dwarf + .units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + + decl_section!(artifact.DebugAbbrev = sections.debug_abbrev); + decl_section!(artifact.DebugInfo = sections.debug_info); + decl_section!(artifact.DebugStr = sections.debug_str); + decl_section!(artifact.DebugLine = sections.debug_line); + + let debug_ranges_not_empty = !sections.debug_ranges.0.writer.slice().is_empty(); + if debug_ranges_not_empty { + decl_section!(artifact.DebugRanges = sections.debug_ranges); + } + + let debug_rnglists_not_empty = !sections.debug_rnglists.0.writer.slice().is_empty(); + if debug_rnglists_not_empty { + decl_section!(artifact.DebugRngLists = sections.debug_rnglists); + } + + sect_relocs!(artifact.DebugAbbrev = sections.debug_abbrev); + sect_relocs!(artifact.DebugInfo = sections.debug_info); + sect_relocs!(artifact.DebugStr = sections.debug_str); + sect_relocs!(artifact.DebugLine = sections.debug_line); + + if debug_ranges_not_empty { + sect_relocs!(artifact.DebugRanges = sections.debug_ranges); + } + + if debug_rnglists_not_empty { + sect_relocs!(artifact.DebugRngLists = sections.debug_rnglists); + } +} + +struct WriterRelocate<'a> { + relocs: Vec, + writer: EndianVec, + symbol_resolver: &'a SymbolResolver, +} + +impl<'a> WriterRelocate<'a> { + fn new(endian: RunTimeEndian, symbol_resolver: &'a SymbolResolver) -> Self { + WriterRelocate { + relocs: Vec::new(), + writer: EndianVec::new(endian), + symbol_resolver, + } + } +} + +impl<'a> Writer for WriterRelocate<'a> { + type Endian = RunTimeEndian; + + fn endian(&self) -> Self::Endian { + self.writer.endian() + } + + fn len(&self) -> usize { + self.writer.len() + } + + fn write(&mut self, bytes: &[u8]) -> Result<()> { + self.writer.write(bytes) + } + + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> { + self.writer.write_at(offset, bytes) + } + + fn write_address(&mut self, address: Address, size: u8) -> Result<()> { + match address { + Address::Absolute(val) => self.write_word(val, size), + Address::Relative { symbol, addend } => { + match self.symbol_resolver.resolve_symbol(symbol, addend as i64) { + ResolvedSymbol::PhysicalAddress(addr) => self.write_word(addr, size), + ResolvedSymbol::Reloc { name, addend } => { + let offset = self.len() as u64; + self.relocs.push(DebugReloc { + offset: offset as u32, + size, + name, + addend, + }); + self.write_word(addend as u64, size) + } + } + } + } + } + + fn write_offset(&mut self, val: usize, section: SectionId, size: u8) -> Result<()> { + let offset = self.len() as u32; + let name = section.name().to_string(); + self.relocs.push(DebugReloc { + offset, + size, + name, + addend: val as i64, + }); + self.write_word(val as u64, size) + } + + fn write_offset_at( + &mut self, + offset: usize, + val: usize, + section: SectionId, + size: u8, + ) -> Result<()> { + let name = section.name().to_string(); + self.relocs.push(DebugReloc { + offset: offset as u32, + size, + name, + addend: val as i64, + }); + self.write_word_at(offset, val as u64, size) + } +} diff --git a/lib/environ/src/compilation.rs b/lib/environ/src/compilation.rs index ca0b845222a0..d566051fef0a 100644 --- a/lib/environ/src/compilation.rs +++ b/lib/environ/src/compilation.rs @@ -66,3 +66,32 @@ pub enum CompileError { #[fail(display = "Compilation error: {}", _0)] Codegen(CodegenError), } + +/// Single address point transform. +#[derive(Debug)] +pub struct InstructionAddressTransform { + /// Original source location. + pub srcloc: ir::SourceLoc, + + /// Generated instructions offset. + pub code_offset: usize, + + /// Generated instructions length. + pub code_len: usize, +} + +/// Function and its instructions transforms. +#[derive(Debug)] +pub struct FunctionAddressTransform { + /// Instructions transforms + pub locations: Vec, + + /// Generated function body offset if applicable, otherwise 0. + pub body_offset: usize, + + /// Generated function body length. + pub body_len: usize, +} + +/// Function AddressTransforms collection. +pub type AddressTransforms = PrimaryMap; diff --git a/lib/environ/src/cranelift.rs b/lib/environ/src/cranelift.rs index 47b23cf0d594..0e3a762f3918 100644 --- a/lib/environ/src/cranelift.rs +++ b/lib/environ/src/cranelift.rs @@ -1,6 +1,9 @@ //! Support for compiling with Cranelift. -use crate::compilation::{Compilation, CompileError, Relocation, RelocationTarget, Relocations}; +use crate::compilation::{ + AddressTransforms, Compilation, CompileError, FunctionAddressTransform, + InstructionAddressTransform, Relocation, RelocationTarget, Relocations, +}; use crate::func_environ::{ get_func_name, get_imported_memory32_grow_name, get_imported_memory32_size_name, get_memory32_grow_name, get_memory32_size_name, FuncEnvironment, @@ -81,15 +84,41 @@ impl RelocSink { } } +fn get_address_transform( + context: &Context, + isa: &isa::TargetIsa, +) -> Vec { + let mut result = Vec::new(); + + let func = &context.func; + let mut ebbs = func.layout.ebbs().collect::>(); + ebbs.sort_by_key(|ebb| func.offsets[*ebb]); // Ensure inst offsets always increase + + let encinfo = isa.encoding_info(); + for ebb in ebbs { + for (offset, inst, size) in func.inst_offsets(ebb, &encinfo) { + let srcloc = func.srclocs[inst]; + result.push(InstructionAddressTransform { + srcloc, + code_offset: offset as usize, + code_len: size as usize, + }); + } + } + result +} + /// Compile the module using Cranelift, producing a compilation result with /// associated relocations. pub fn compile_module<'data, 'module>( module: &'module Module, function_body_inputs: PrimaryMap, isa: &dyn isa::TargetIsa, -) -> Result<(Compilation, Relocations), CompileError> { + generate_debug_info: bool, +) -> Result<(Compilation, Relocations, AddressTransforms), CompileError> { let mut functions = PrimaryMap::with_capacity(function_body_inputs.len()); let mut relocations = PrimaryMap::with_capacity(function_body_inputs.len()); + let mut address_transforms = PrimaryMap::with_capacity(function_body_inputs.len()); function_body_inputs .into_iter() @@ -116,15 +145,31 @@ pub fn compile_module<'data, 'module>( context .compile_and_emit(isa, &mut code_buf, &mut reloc_sink, &mut trap_sink) .map_err(CompileError::Codegen)?; - Ok((code_buf, reloc_sink.func_relocs)) + + let address_transform = if generate_debug_info { + let body_len = code_buf.len(); + let at = get_address_transform(&context, isa); + Some(FunctionAddressTransform { + locations: at, + body_offset: 0, + body_len, + }) + } else { + None + }; + + Ok((code_buf, reloc_sink.func_relocs, address_transform)) }) .collect::, CompileError>>()? .into_iter() - .for_each(|(function, relocs)| { + .for_each(|(function, relocs, address_transform)| { functions.push(function); relocations.push(relocs); + if let Some(address_transform) = address_transform { + address_transforms.push(address_transform); + } }); // TODO: Reorganize where we create the Vec for the resolved imports. - Ok((Compilation::new(functions), relocations)) + Ok((Compilation::new(functions), relocations, address_transforms)) } diff --git a/lib/environ/src/lib.rs b/lib/environ/src/lib.rs index 04111ce7d094..9bd8620a39b6 100644 --- a/lib/environ/src/lib.rs +++ b/lib/environ/src/lib.rs @@ -47,7 +47,8 @@ mod vmoffsets; pub mod cranelift; pub use crate::compilation::{ - Compilation, CompileError, Relocation, RelocationTarget, Relocations, + AddressTransforms, Compilation, CompileError, InstructionAddressTransform, Relocation, + RelocationTarget, Relocations, }; pub use crate::module::{ Export, MemoryPlan, MemoryStyle, Module, TableElements, TablePlan, TableStyle, @@ -57,7 +58,7 @@ pub use crate::module_environ::{ ModuleTranslation, }; pub use crate::tunables::Tunables; -pub use crate::vmoffsets::VMOffsets; +pub use crate::vmoffsets::{TargetSharedSignatureIndex, VMOffsets}; /// WebAssembly page sizes are defined to be 64KiB. pub const WASM_PAGE_SIZE: u32 = 0x10000; diff --git a/lib/environ/src/vmoffsets.rs b/lib/environ/src/vmoffsets.rs index d55ab8cc8f41..a54fb3dc9831 100644 --- a/lib/environ/src/vmoffsets.rs +++ b/lib/environ/src/vmoffsets.rs @@ -518,3 +518,19 @@ impl VMOffsets { .unwrap() } } + +/// Target specific type for shared signature index. +#[derive(Debug, Copy, Clone)] +pub struct TargetSharedSignatureIndex(u32); + +impl TargetSharedSignatureIndex { + /// Constructs `TargetSharedSignatureIndex`. + pub fn new(value: u32) -> Self { + TargetSharedSignatureIndex(value) + } + + /// Returns index value. + pub fn index(&self) -> u32 { + self.0 + } +} diff --git a/lib/jit/Cargo.toml b/lib/jit/Cargo.toml index 45df39dd306d..407596270071 100644 --- a/lib/jit/Cargo.toml +++ b/lib/jit/Cargo.toml @@ -18,6 +18,7 @@ cranelift-wasm = "0.28.0" cranelift-frontend = "0.28.0" wasmtime-environ = { path = "../environ", default-features = false } wasmtime-runtime = { path = "../runtime", default-features = false } +wasmtime-debug = { path = "../debug", default-features = false } region = "2.0.0" failure = { version = "0.1.3", default-features = false } failure_derive = { version = "0.1.3", default-features = false } diff --git a/lib/jit/src/code_memory.rs b/lib/jit/src/code_memory.rs index b83ffe1510d5..641577ad9d3b 100644 --- a/lib/jit/src/code_memory.rs +++ b/lib/jit/src/code_memory.rs @@ -2,6 +2,7 @@ use core::{cmp, mem}; use region; +use std::boxed::Box; use std::string::String; use std::vec::Vec; use wasmtime_runtime::{Mmap, VMFunctionBody}; @@ -63,6 +64,25 @@ impl CodeMemory { Ok(Self::view_as_mut_vmfunc_slice(new)) } + /// Allocate enough continuous memory block for multiple code blocks. See also + /// allocate_copy_of_byte_slice. + pub fn allocate_copy_of_byte_slices( + &mut self, + slices: &[&[u8]], + ) -> Result, String> { + let total_len = slices.into_iter().fold(0, |acc, slice| acc + slice.len()); + let new = self.allocate(total_len)?; + let mut tail = new; + let mut result = Vec::with_capacity(slices.len()); + for slice in slices { + let (block, next_tail) = tail.split_at_mut(slice.len()); + block.copy_from_slice(slice); + tail = next_tail; + result.push(Self::view_as_mut_vmfunc_slice(block)); + } + Ok(result.into_boxed_slice()) + } + /// Make all allocated memory executable. pub fn publish(&mut self) { self.mmaps diff --git a/lib/jit/src/compiler.rs b/lib/jit/src/compiler.rs index f7b94d3f110b..1970d55b20ae 100644 --- a/lib/jit/src/compiler.rs +++ b/lib/jit/src/compiler.rs @@ -14,6 +14,7 @@ use cranelift_wasm::DefinedFuncIndex; use std::boxed::Box; use std::string::String; use std::vec::Vec; +use wasmtime_debug::{emit_debugsections_image, DebugInfoData}; use wasmtime_environ::cranelift; use wasmtime_environ::{Compilation, CompileError, Module, Relocations, Tunables}; use wasmtime_runtime::{InstantiationError, SignatureRegistry, VMFunctionBody}; @@ -66,26 +67,54 @@ impl Compiler { &mut self, module: &Module, function_body_inputs: PrimaryMap, + debug_data: Option, ) -> Result< ( PrimaryMap, Relocations, + Option>, ), SetupError, > { - let (compilation, relocations) = - cranelift::compile_module(module, function_body_inputs, &*self.isa) - .map_err(SetupError::Compile)?; + let (compilation, relocations, address_transform) = cranelift::compile_module( + module, + function_body_inputs, + &*self.isa, + debug_data.is_some(), + ) + .map_err(SetupError::Compile)?; let allocated_functions = - allocate_functions(&mut self.code_memory, compilation).map_err(|message| { + allocate_functions(&mut self.code_memory, &compilation).map_err(|message| { SetupError::Instantiate(InstantiationError::Resource(format!( "failed to allocate memory for functions: {}", message ))) })?; - Ok((allocated_functions, relocations)) + let dbg = if let Some(debug_data) = debug_data { + let target_config = self.isa.frontend_config(); + let triple = self.isa.triple().clone(); + let mut funcs = Vec::new(); + for (i, allocated) in allocated_functions.into_iter() { + let ptr = (*allocated) as *const u8; + let body_len = compilation.functions[i].len(); + funcs.push((ptr, body_len)); + } + let bytes = emit_debugsections_image( + triple, + &target_config, + &debug_data, + &address_transform, + &funcs, + ) + .map_err(|e| SetupError::DebugInfo(e))?; + Some(bytes) + } else { + None + }; + + Ok((allocated_functions, relocations, dbg)) } /// Create a trampoline for invoking a function. @@ -219,12 +248,22 @@ fn make_trampoline( fn allocate_functions( code_memory: &mut CodeMemory, - compilation: Compilation, + compilation: &Compilation, ) -> Result, String> { + // Allocate code for all function in one continuous memory block. + // First, collect all function bodies into vector to pass to the + // allocate_copy_of_byte_slices. + let bodies = compilation + .functions + .values() + .map(|body| body.as_slice()) + .collect::>(); + let fat_ptrs = code_memory.allocate_copy_of_byte_slices(&bodies)?; + // Second, create a PrimaryMap from result vector of pointers. let mut result = PrimaryMap::with_capacity(compilation.functions.len()); - for (_, body) in compilation.functions.into_iter() { - let fatptr: *mut [VMFunctionBody] = code_memory.allocate_copy_of_byte_slice(body)?; - result.push(fatptr); + for i in 0..fat_ptrs.len() { + let fat_ptr: *mut [VMFunctionBody] = fat_ptrs[i]; + result.push(fat_ptr); } Ok(result) } diff --git a/lib/jit/src/context.rs b/lib/jit/src/context.rs index 1b36f9bc0af6..abb5f4c28752 100644 --- a/lib/jit/src/context.rs +++ b/lib/jit/src/context.rs @@ -48,6 +48,7 @@ pub struct Context { namespace: Namespace, compiler: Box, global_exports: Rc>>>, + debug_info: bool, } impl Context { @@ -57,9 +58,20 @@ impl Context { namespace: Namespace::new(), compiler, global_exports: Rc::new(RefCell::new(HashMap::new())), + debug_info: false, } } + /// Get debug_info settings. + pub fn debug_info(&self) -> bool { + self.debug_info + } + + /// Set debug_info settings. + pub fn set_debug_info(&mut self, value: bool) { + self.debug_info = value; + } + /// Construct a new instance of `Context` with the given target. pub fn with_isa(isa: Box) -> Self { Self::new(Box::new(Compiler::new(isa))) @@ -88,12 +100,14 @@ impl Context { fn instantiate(&mut self, data: &[u8]) -> Result { self.validate(&data).map_err(SetupError::Validate)?; + let debug_info = self.debug_info(); instantiate( &mut *self.compiler, &data, &mut self.namespace, Rc::clone(&self.global_exports), + debug_info, ) } diff --git a/lib/jit/src/instantiate.rs b/lib/jit/src/instantiate.rs index 1dd46679a9c4..df2c09d18c71 100644 --- a/lib/jit/src/instantiate.rs +++ b/lib/jit/src/instantiate.rs @@ -11,14 +11,17 @@ use core::cell::RefCell; use cranelift_entity::{BoxedSlice, PrimaryMap}; use cranelift_wasm::{DefinedFuncIndex, SignatureIndex}; use std::boxed::Box; +use std::io::Write; use std::rc::Rc; use std::string::String; use std::vec::Vec; +use wasmtime_debug::read_debuginfo; use wasmtime_environ::{ CompileError, DataInitializer, DataInitializerLocation, Module, ModuleEnvironment, }; use wasmtime_runtime::{ - Export, Imports, InstanceHandle, InstantiationError, VMFunctionBody, VMSharedSignatureIndex, + Export, GdbJitImageRegistration, Imports, InstanceHandle, InstantiationError, VMFunctionBody, + VMSharedSignatureIndex, }; /// An error condition while setting up a wasm instance, be it validation, @@ -37,6 +40,10 @@ pub enum SetupError { /// trapped. #[fail(display = "Instantiation error: {}", _0)] Instantiate(InstantiationError), + + /// Debug information generation error occured. + #[fail(display = "Debug information error: {}", _0)] + DebugInfo(failure::Error), } /// This is similar to `CompiledModule`, but references the data initializers @@ -47,6 +54,7 @@ struct RawCompiledModule<'data> { imports: Imports, data_initializers: Box<[DataInitializer<'data>]>, signatures: BoxedSlice, + dbg_jit_registration: Option, } impl<'data> RawCompiledModule<'data> { @@ -55,6 +63,7 @@ impl<'data> RawCompiledModule<'data> { compiler: &mut Compiler, data: &'data [u8], resolver: &mut dyn Resolver, + debug_info: bool, ) -> Result { let environ = ModuleEnvironment::new(compiler.frontend_config(), compiler.tunables()); @@ -62,8 +71,17 @@ impl<'data> RawCompiledModule<'data> { .translate(data) .map_err(|error| SetupError::Compile(CompileError::Wasm(error)))?; - let (allocated_functions, relocations) = - compiler.compile(&translation.module, translation.function_body_inputs)?; + let debug_data = if debug_info { + Some(read_debuginfo(&data)) + } else { + None + }; + + let (allocated_functions, relocations, dbg_image) = compiler.compile( + &translation.module, + translation.function_body_inputs, + debug_data, + )?; let imports = link_module( &translation.module, @@ -98,12 +116,22 @@ impl<'data> RawCompiledModule<'data> { // Make all code compiled thus far executable. compiler.publish_compiled_code(); + let dbg_jit_registration = if let Some(img) = dbg_image { + let mut bytes = Vec::new(); + bytes.write_all(&img).expect("all written"); + let reg = GdbJitImageRegistration::register(bytes); + Some(reg) + } else { + None + }; + Ok(Self { module: translation.module, finished_functions, imports, data_initializers: translation.data_initializers.into_boxed_slice(), signatures: signatures.into_boxed_slice(), + dbg_jit_registration, }) } } @@ -116,6 +144,7 @@ pub struct CompiledModule { data_initializers: Box<[OwnedDataInitializer]>, signatures: BoxedSlice, global_exports: Rc>>>, + dbg_jit_registration: Option>, } impl CompiledModule { @@ -125,8 +154,9 @@ impl CompiledModule { data: &'data [u8], resolver: &mut dyn Resolver, global_exports: Rc>>>, + debug_info: bool, ) -> Result { - let raw = RawCompiledModule::<'data>::new(compiler, data, resolver)?; + let raw = RawCompiledModule::<'data>::new(compiler, data, resolver, debug_info)?; Ok(Self::from_parts( raw.module, @@ -139,6 +169,7 @@ impl CompiledModule { .collect::>() .into_boxed_slice(), raw.signatures.clone(), + raw.dbg_jit_registration, )) } @@ -150,6 +181,7 @@ impl CompiledModule { imports: Imports, data_initializers: Box<[OwnedDataInitializer]>, signatures: BoxedSlice, + dbg_jit_registration: Option, ) -> Self { Self { module: Rc::new(module), @@ -158,6 +190,7 @@ impl CompiledModule { imports, data_initializers, signatures, + dbg_jit_registration: dbg_jit_registration.map(|r| Rc::new(r)), } } @@ -182,6 +215,7 @@ impl CompiledModule { self.imports.clone(), &data_initializers, self.signatures.clone(), + self.dbg_jit_registration.as_ref().map(|r| Rc::clone(&r)), Box::new(()), ) } @@ -215,8 +249,9 @@ pub fn instantiate( data: &[u8], resolver: &mut dyn Resolver, global_exports: Rc>>>, + debug_info: bool, ) -> Result { - let raw = RawCompiledModule::new(compiler, data, resolver)?; + let raw = RawCompiledModule::new(compiler, data, resolver, debug_info)?; InstanceHandle::new( Rc::new(raw.module), @@ -225,6 +260,7 @@ pub fn instantiate( raw.imports, &*raw.data_initializers, raw.signatures, + raw.dbg_jit_registration.map(|r| Rc::new(r)), Box::new(()), ) .map_err(SetupError::Instantiate) diff --git a/lib/obj/Cargo.toml b/lib/obj/Cargo.toml index 0e073d42943c..79def3e33016 100644 --- a/lib/obj/Cargo.toml +++ b/lib/obj/Cargo.toml @@ -14,5 +14,6 @@ edition = "2018" [dependencies] cranelift-codegen = "0.28.0" cranelift-entity = "0.28.0" +cranelift-wasm = "0.28.0" wasmtime-environ = { path = "../environ" } faerie = "0.7.1" diff --git a/lib/obj/src/context.rs b/lib/obj/src/context.rs new file mode 100644 index 000000000000..779013afe7af --- /dev/null +++ b/lib/obj/src/context.rs @@ -0,0 +1,90 @@ +use cranelift_codegen::isa::TargetFrontendConfig; +use cranelift_entity::EntityRef; +use cranelift_wasm::GlobalInit; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::ptr; +use wasmtime_environ::{Module, TargetSharedSignatureIndex, VMOffsets}; + +pub struct TableRelocation { + pub index: usize, + pub offset: usize, +} + +pub fn layout_vmcontext( + module: &Module, + target_config: &TargetFrontendConfig, +) -> (Box<[u8]>, Box<[TableRelocation]>) { + let ofs = VMOffsets::new(target_config.pointer_bytes(), &module); + let out_len = ofs.size_of_vmctx() as usize; + let mut out = Vec::with_capacity(out_len); + out.resize(out_len, 0); + + // Assign unique indicies to unique signatures. + let mut signature_registry = HashMap::new(); + let mut signature_registry_len = signature_registry.len(); + for (index, sig) in module.signatures.iter() { + let offset = ofs.vmctx_vmshared_signature_id(index) as usize; + let target_index = match signature_registry.entry(sig) { + Entry::Occupied(o) => *o.get(), + Entry::Vacant(v) => { + assert!(signature_registry_len <= ::std::u32::MAX as usize); + let id = TargetSharedSignatureIndex::new(signature_registry_len as u32); + signature_registry_len += 1; + *v.insert(id) + } + }; + unsafe { + let to = out.as_mut_ptr().add(offset) as *mut TargetSharedSignatureIndex; + ptr::write(to, target_index); + } + } + + let num_tables_imports = module.imported_tables.len(); + let mut table_relocs = Vec::with_capacity(module.table_plans.len() - num_tables_imports); + for (index, table) in module.table_plans.iter().skip(num_tables_imports) { + let def_index = module.defined_table_index(index).unwrap(); + let offset = ofs.vmctx_vmtable_definition(def_index) as usize; + let current_elements = table.table.minimum as usize; + unsafe { + assert_eq!( + ::std::mem::size_of::() as u8, + ofs.size_of_vmtable_definition_current_elements(), + "vmtable_definition_current_elements expected to be u32" + ); + let to = out + .as_mut_ptr() + .add(offset) + .add(ofs.vmtable_definition_current_elements() as usize); + ptr::write(to as *mut u32, current_elements as u32); + } + table_relocs.push(TableRelocation { + index: def_index.index(), + offset, + }); + } + + let num_globals_imports = module.imported_globals.len(); + for (index, global) in module.globals.iter().skip(num_globals_imports) { + let def_index = module.defined_global_index(index).unwrap(); + let offset = ofs.vmctx_vmglobal_definition(def_index) as usize; + let to = unsafe { out.as_mut_ptr().add(offset) }; + match global.initializer { + GlobalInit::I32Const(x) => unsafe { + ptr::write(to as *mut i32, x); + }, + GlobalInit::I64Const(x) => unsafe { + ptr::write(to as *mut i64, x); + }, + GlobalInit::F32Const(x) => unsafe { + ptr::write(to as *mut u32, x); + }, + GlobalInit::F64Const(x) => unsafe { + ptr::write(to as *mut u64, x); + }, + _ => panic!("unsupported global type"), + } + } + + (out.into_boxed_slice(), table_relocs.into_boxed_slice()) +} diff --git a/lib/obj/src/data_segment.rs b/lib/obj/src/data_segment.rs new file mode 100644 index 000000000000..fdf4397f139a --- /dev/null +++ b/lib/obj/src/data_segment.rs @@ -0,0 +1,32 @@ +use faerie::{Artifact, Decl}; +use wasmtime_environ::DataInitializer; + +/// Declares data segment symbol +pub fn declare_data_segment( + obj: &mut Artifact, + _data_initaliazer: &DataInitializer, + index: usize, +) -> Result<(), String> { + let name = format!("_memory_{}", index); + obj.declare( + name, + Decl::Data { + writable: false, + global: false, + }, + ) + .map_err(|err| format!("{}", err))?; + Ok(()) +} + +/// Emit segment data and initialization location +pub fn emit_data_segment( + obj: &mut Artifact, + data_initaliazer: &DataInitializer, + index: usize, +) -> Result<(), String> { + let name = format!("_memory_{}", index); + obj.define(name, Vec::from(data_initaliazer.data)) + .map_err(|err| format!("{}", err))?; + Ok(()) +} diff --git a/lib/obj/src/emit_module.rs b/lib/obj/src/emit_module.rs deleted file mode 100644 index 1c0d32994cc9..000000000000 --- a/lib/obj/src/emit_module.rs +++ /dev/null @@ -1,37 +0,0 @@ -use cranelift_codegen::settings; -use cranelift_codegen::settings::Configurable; -use cranelift_entity::EntityRef; -use faerie::Artifact; -use wasmtime_environ::{Compilation, Module, Relocations}; - -/// Emits a module that has been emitted with the `wasmtime-environ` environment -/// implementation to a native object file. -pub fn emit_module( - obj: &mut Artifact, - module: &Module, - compilation: &Compilation, - relocations: &Relocations, -) -> Result<(), String> { - debug_assert!( - module.start_func.is_none() - || module.start_func.unwrap().index() >= module.imported_funcs.len(), - "imported start functions not supported yet" - ); - - let mut shared_builder = settings::builder(); - shared_builder - .enable("enable_verifier") - .expect("Missing enable_verifier setting"); - - for (i, function_relocs) in relocations.iter() { - assert!(function_relocs.is_empty(), "relocations not supported yet"); - let body = &compilation.functions[i]; - let func_index = module.func_index(i); - let string_name = format!("wasm_function[{}]", func_index.index()); - - obj.define(string_name, body.clone()) - .map_err(|err| format!("{}", err))?; - } - - Ok(()) -} diff --git a/lib/obj/src/function.rs b/lib/obj/src/function.rs new file mode 100644 index 000000000000..a680986c6e24 --- /dev/null +++ b/lib/obj/src/function.rs @@ -0,0 +1,70 @@ +use cranelift_codegen::settings; +use cranelift_codegen::settings::Configurable; +use cranelift_entity::EntityRef; +use faerie::{Artifact, Decl, Link}; +use wasmtime_environ::{Compilation, Module, RelocationTarget, Relocations}; + +/// Defines module functions +pub fn declare_functions( + obj: &mut Artifact, + module: &Module, + relocations: &Relocations, +) -> Result<(), String> { + for (i, _function_relocs) in relocations.iter().rev() { + let func_index = module.func_index(i); + let string_name = format!("_wasm_function_{}", func_index.index()); + obj.declare(string_name, Decl::Function { global: true }) + .map_err(|err| format!("{}", err))?; + } + Ok(()) +} + +/// Emits module functions +pub fn emit_functions( + obj: &mut Artifact, + module: &Module, + compilation: &Compilation, + relocations: &Relocations, +) -> Result<(), String> { + debug_assert!( + module.start_func.is_none() + || module.start_func.unwrap().index() >= module.imported_funcs.len(), + "imported start functions not supported yet" + ); + + let mut shared_builder = settings::builder(); + shared_builder + .enable("enable_verifier") + .expect("Missing enable_verifier setting"); + + for (i, _function_relocs) in relocations.iter() { + let body = &compilation.functions[i]; + let func_index = module.func_index(i); + let string_name = format!("_wasm_function_{}", func_index.index()); + + obj.define(string_name, body.clone()) + .map_err(|err| format!("{}", err))?; + } + + for (i, function_relocs) in relocations.iter() { + let func_index = module.func_index(i); + let string_name = format!("_wasm_function_{}", func_index.index()); + for r in function_relocs { + debug_assert_eq!(r.addend, 0); + match r.reloc_target { + RelocationTarget::UserFunc(target_index) => { + let target_name = format!("_wasm_function_{}", target_index.index()); + obj.link(Link { + from: &string_name, + to: &target_name, + at: r.offset as u64, + }) + .map_err(|err| format!("{}", err))?; + } + _ => panic!("relocations target not supported yet"), + }; + } + } + + Ok(()) +} diff --git a/lib/obj/src/lib.rs b/lib/obj/src/lib.rs index c8041793b625..f4d9dc2d00f5 100644 --- a/lib/obj/src/lib.rs +++ b/lib/obj/src/lib.rs @@ -26,9 +26,13 @@ ) )] -mod emit_module; +mod context; +mod data_segment; +mod function; +mod module; +mod table; -pub use crate::emit_module::emit_module; +pub use crate::module::emit_module; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/lib/obj/src/module.rs b/lib/obj/src/module.rs new file mode 100644 index 000000000000..c9d1c6631dbf --- /dev/null +++ b/lib/obj/src/module.rs @@ -0,0 +1,69 @@ +use crate::context::layout_vmcontext; +use crate::data_segment::{declare_data_segment, emit_data_segment}; +use crate::function::{declare_functions, emit_functions}; +use crate::table::{declare_table, emit_table}; +use cranelift_codegen::isa::TargetFrontendConfig; +use faerie::{Artifact, Decl, Link}; +use wasmtime_environ::{Compilation, DataInitializer, Module, Relocations}; + +fn emit_vmcontext_init( + obj: &mut Artifact, + module: &Module, + target_config: &TargetFrontendConfig, +) -> Result<(), String> { + let (data, table_relocs) = layout_vmcontext(module, target_config); + obj.declare_with( + "_vmcontext_init", + Decl::Data { + writable: false, + global: true, + }, + data.to_vec(), + ) + .map_err(|err| format!("{}", err))?; + for reloc in table_relocs.iter() { + let target_name = format!("_table_{}", reloc.index); + obj.link(Link { + from: "_vmcontext_init", + to: &target_name, + at: reloc.offset as u64, + }) + .map_err(|err| format!("{}", err))?; + } + Ok(()) +} + +/// Emits a module that has been emitted with the `wasmtime-environ` environment +/// implementation to a native object file. +pub fn emit_module( + obj: &mut Artifact, + module: &Module, + compilation: &Compilation, + relocations: &Relocations, + data_initializers: &[DataInitializer], + target_config: &TargetFrontendConfig, +) -> Result<(), String> { + declare_functions(obj, module, relocations)?; + + for i in 0..data_initializers.len() { + declare_data_segment(obj, &data_initializers[i], i)?; + } + + for i in 0..module.table_plans.len() { + declare_table(obj, i)?; + } + + emit_functions(obj, module, compilation, relocations)?; + + for i in 0..data_initializers.len() { + emit_data_segment(obj, &data_initializers[i], i)?; + } + + for i in 0..module.table_plans.len() { + emit_table(obj, i)?; + } + + emit_vmcontext_init(obj, module, target_config)?; + + Ok(()) +} diff --git a/lib/obj/src/table.rs b/lib/obj/src/table.rs new file mode 100644 index 000000000000..bfe0666577fe --- /dev/null +++ b/lib/obj/src/table.rs @@ -0,0 +1,24 @@ +use faerie::{Artifact, Decl}; + +/// Declares data segment symbol +pub fn declare_table(obj: &mut Artifact, index: usize) -> Result<(), String> { + let name = format!("_table_{}", index); + obj.declare( + name, + Decl::Data { + writable: false, + global: false, + }, + ) + .map_err(|err| format!("{}", err))?; + Ok(()) +} + +/// Emit segment data and initialization location +pub fn emit_table(obj: &mut Artifact, index: usize) -> Result<(), String> { + let name = format!("_table_{}", index); + // FIXME: We need to initialize table using function symbols + obj.define(name, Vec::new()) + .map_err(|err| format!("{}", err))?; + Ok(()) +} diff --git a/lib/runtime/src/instance.rs b/lib/runtime/src/instance.rs index c3a25f70a05b..1632ee91beea 100644 --- a/lib/runtime/src/instance.rs +++ b/lib/runtime/src/instance.rs @@ -4,6 +4,7 @@ use crate::export::Export; use crate::imports::Imports; +use crate::jit_int::GdbJitImageRegistration; use crate::memory::LinearMemory; use crate::mmap::Mmap; use crate::signalhandlers::{wasmtime_init_eager, wasmtime_init_finish}; @@ -215,6 +216,9 @@ pub(crate) struct Instance { /// Hosts can store arbitrary per-instance information here. host_state: Box, + /// Optional image of JIT'ed code for debugger registration. + dbg_jit_registration: Option>, + /// Additional context used by compiled wasm code. This field is last, and /// represents a dynamically-sized array that extends beyond the nominal /// end of the struct (similar to a flexible array member). @@ -623,6 +627,7 @@ impl InstanceHandle { imports: Imports, data_initializers: &[DataInitializer<'_>], vmshared_signatures: BoxedSlice, + dbg_jit_registration: Option>, host_state: Box, ) -> Result { let mut tables = create_tables(&module); @@ -664,6 +669,7 @@ impl InstanceHandle { memories, tables, finished_functions, + dbg_jit_registration, host_state, vmctx: VMContext {}, }; diff --git a/lib/runtime/src/jit_int.rs b/lib/runtime/src/jit_int.rs new file mode 100644 index 000000000000..24c6bc8fdb29 --- /dev/null +++ b/lib/runtime/src/jit_int.rs @@ -0,0 +1,113 @@ +//! The GDB's JIT compilation interface. The low level module that exposes +//! the __jit_debug_register_code() and __jit_debug_descriptor to register +//! or unregister generated object images with debuggers. + +use std::boxed::Box; +use std::ptr; + +#[repr(C)] +struct JITCodeEntry { + next_entry: *mut JITCodeEntry, + prev_entry: *mut JITCodeEntry, + symfile_addr: *const u8, + symfile_size: u64, +} + +const JIT_NOACTION: u32 = 0; +const JIT_REGISTER_FN: u32 = 1; +const JIT_UNREGISTER_FN: u32 = 2; + +#[repr(C)] +struct JITDescriptor { + version: u32, + action_flag: u32, + relevant_entry: *mut JITCodeEntry, + first_entry: *mut JITCodeEntry, +} + +#[no_mangle] +#[used] +static mut __jit_debug_descriptor: JITDescriptor = JITDescriptor { + version: 1, + action_flag: JIT_NOACTION, + relevant_entry: ptr::null_mut(), + first_entry: ptr::null_mut(), +}; + +#[no_mangle] +#[inline(never)] +extern "C" fn __jit_debug_register_code() {} + +/// Registeration for JIT image +pub struct GdbJitImageRegistration { + entry: *mut JITCodeEntry, + file: Vec, +} + +impl GdbJitImageRegistration { + /// Registers JIT image using __jit_debug_register_code + pub fn register(file: Vec) -> GdbJitImageRegistration { + GdbJitImageRegistration { + entry: unsafe { register_gdb_jit_image(&file) }, + file, + } + } + + /// JIT image used in registration + pub fn file(&self) -> &[u8] { + &self.file + } +} + +impl Drop for GdbJitImageRegistration { + fn drop(&mut self) { + unsafe { + unregister_gdb_jit_image(self.entry); + } + } +} + +unsafe fn register_gdb_jit_image(file: &[u8]) -> *mut JITCodeEntry { + // Create a code entry for the file, which gives the start and size of the symbol file. + let entry = Box::into_raw(Box::new(JITCodeEntry { + next_entry: __jit_debug_descriptor.first_entry, + prev_entry: ptr::null_mut(), + symfile_addr: file.as_ptr(), + symfile_size: file.len() as u64, + })); + // Add it to the linked list in the JIT descriptor. + if __jit_debug_descriptor.first_entry != ptr::null_mut() { + (*__jit_debug_descriptor.first_entry).prev_entry = entry; + } + __jit_debug_descriptor.first_entry = entry; + // Point the relevant_entry field of the descriptor at the entry. + __jit_debug_descriptor.relevant_entry = entry; + // Set action_flag to JIT_REGISTER and call __jit_debug_register_code. + __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; + __jit_debug_register_code(); + + __jit_debug_descriptor.action_flag = JIT_NOACTION; + __jit_debug_descriptor.relevant_entry = ptr::null_mut(); + entry +} + +unsafe fn unregister_gdb_jit_image(entry: *mut JITCodeEntry) { + // Remove the code entry corresponding to the code from the linked list. + if (*entry).prev_entry != ptr::null_mut() { + (*(*entry).prev_entry).next_entry = (*entry).next_entry; + } else { + __jit_debug_descriptor.first_entry = (*entry).next_entry; + } + if (*entry).next_entry != ptr::null_mut() { + (*(*entry).next_entry).prev_entry = (*entry).prev_entry; + } + // Point the relevant_entry field of the descriptor at the code entry. + __jit_debug_descriptor.relevant_entry = entry; + // Set action_flag to JIT_UNREGISTER and call __jit_debug_register_code. + __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN; + __jit_debug_register_code(); + + __jit_debug_descriptor.action_flag = JIT_NOACTION; + __jit_debug_descriptor.relevant_entry = ptr::null_mut(); + let _box = Box::from_raw(entry); +} diff --git a/lib/runtime/src/lib.rs b/lib/runtime/src/lib.rs index 624b7e436eba..43b6bd89caba 100644 --- a/lib/runtime/src/lib.rs +++ b/lib/runtime/src/lib.rs @@ -34,6 +34,7 @@ extern crate winapi; mod export; mod imports; mod instance; +mod jit_int; mod memory; mod mmap; mod sig_registry; @@ -47,6 +48,7 @@ pub mod libcalls; pub use crate::export::Export; pub use crate::imports::Imports; pub use crate::instance::{InstanceHandle, InstantiationError, LinkError}; +pub use crate::jit_int::GdbJitImageRegistration; pub use crate::mmap::Mmap; pub use crate::sig_registry::SignatureRegistry; pub use crate::signalhandlers::{wasmtime_init_eager, wasmtime_init_finish}; diff --git a/lib/runtime/src/vmcontext.rs b/lib/runtime/src/vmcontext.rs index b5f2a507e297..4642b821f64a 100644 --- a/lib/runtime/src/vmcontext.rs +++ b/lib/runtime/src/vmcontext.rs @@ -393,7 +393,7 @@ pub struct VMSharedSignatureIndex(u32); mod test_vmshared_signature_index { use super::VMSharedSignatureIndex; use core::mem::size_of; - use wasmtime_environ::{Module, VMOffsets}; + use wasmtime_environ::{Module, TargetSharedSignatureIndex, VMOffsets}; #[test] fn check_vmshared_signature_index() { @@ -404,6 +404,14 @@ mod test_vmshared_signature_index { usize::from(offsets.size_of_vmshared_signature_index()) ); } + + #[test] + fn check_target_shared_signature_index() { + assert_eq!( + size_of::(), + size_of::() + ); + } } impl VMSharedSignatureIndex { diff --git a/lib/wast/src/spectest.rs b/lib/wast/src/spectest.rs index df62a3f74e6a..e82eb09c13be 100644 --- a/lib/wast/src/spectest.rs +++ b/lib/wast/src/spectest.rs @@ -223,6 +223,7 @@ pub fn instantiate_spectest() -> Result { imports, &data_initializers, signatures.into_boxed_slice(), + None, Box::new(()), ) } diff --git a/src/wasm2obj.rs b/src/wasm2obj.rs index 57eaacf61223..c68ddb50b767 100644 --- a/src/wasm2obj.rs +++ b/src/wasm2obj.rs @@ -45,8 +45,10 @@ use std::io::prelude::*; use std::path::Path; use std::path::PathBuf; use std::process; +use std::str; use std::str::FromStr; use target_lexicon::Triple; +use wasmtime_debug::{emit_debugsections, read_debuginfo}; use wasmtime_environ::{cranelift, ModuleEnvironment, Tunables}; use wasmtime_obj::emit_module; @@ -57,13 +59,14 @@ The translation is dependent on the environment chosen. The default is a dummy environment that produces placeholder values. Usage: - wasm2obj [--target TARGET] -o + wasm2obj [--target TARGET] [-g] -o wasm2obj --help | --version Options: -v, --verbose displays the module and translated functions -h, --help print this help message --target build for the target triple; default is the host machine + -g generate debug information --version print the Cranelift version "; @@ -72,6 +75,7 @@ struct Args { arg_file: String, arg_output: String, arg_target: Option, + flag_g: bool, } fn read_wasm_file(path: PathBuf) -> Result, io::Error> { @@ -91,7 +95,12 @@ fn main() { .unwrap_or_else(|e| e.exit()); let path = Path::new(&args.arg_file); - match handle_module(path.to_path_buf(), &args.arg_target, &args.arg_output) { + match handle_module( + path.to_path_buf(), + &args.arg_target, + &args.arg_output, + args.flag_g, + ) { Ok(()) => {} Err(message) => { println!(" error: {}", message); @@ -100,7 +109,12 @@ fn main() { } } -fn handle_module(path: PathBuf, target: &Option, output: &str) -> Result<(), String> { +fn handle_module( + path: PathBuf, + target: &Option, + output: &str, + generate_debug_info: bool, +) -> Result<(), String> { let data = match read_wasm_file(path) { Ok(data) => data, Err(err) => { @@ -130,7 +144,7 @@ fn handle_module(path: PathBuf, target: &Option, output: &str) -> Result // TODO: Expose the tunables as command-line flags. let tunables = Tunables::default(); - let (module, lazy_function_body_inputs, lazy_data_initializers) = { + let (module, lazy_function_body_inputs, lazy_data_initializers, target_config) = { let environ = ModuleEnvironment::new(isa.frontend_config(), tunables); let translation = environ @@ -141,27 +155,31 @@ fn handle_module(path: PathBuf, target: &Option, output: &str) -> Result translation.module, translation.function_body_inputs, translation.data_initializers, + translation.target_config, ) }; - // FIXME: We need to initialize memory in a way that supports alternate - // memory spaces, imported base addresses, and offsets. - for init in lazy_data_initializers.into_iter() { - obj.define("memory", Vec::from(init.data)) - .map_err(|err| format!("{}", err))?; - } - - let (compilation, relocations) = - cranelift::compile_module(&module, lazy_function_body_inputs, &*isa) + let (compilation, relocations, address_transform) = cranelift::compile_module( + &module, + lazy_function_body_inputs, + &*isa, + generate_debug_info, + ) + .map_err(|e| e.to_string())?; + + emit_module( + &mut obj, + &module, + &compilation, + &relocations, + &lazy_data_initializers, + &target_config, + )?; + + if generate_debug_info { + let debug_data = read_debuginfo(&data); + emit_debugsections(&mut obj, &target_config, &debug_data, &address_transform) .map_err(|e| e.to_string())?; - - emit_module(&mut obj, &module, &compilation, &relocations)?; - - if !module.table_plans.is_empty() { - if module.table_plans.len() > 1 { - return Err(String::from("multiple tables not supported yet")); - } - return Err(String::from("FIXME: implement tables")); } // FIXME: Make the format a parameter. diff --git a/src/wasmtime.rs b/src/wasmtime.rs index a202585e7c10..def4557ffa45 100644 --- a/src/wasmtime.rs +++ b/src/wasmtime.rs @@ -59,13 +59,14 @@ including calling the start function if one is present. Additional functions given with --invoke are then called. Usage: - wasmtime [-od] ... - wasmtime [-od] ... --invoke= + wasmtime [-odg] ... + wasmtime [-odg] ... --invoke= wasmtime --help | --version Options: --invoke= name of function to run -o, --optimize runs optimization passes on the translated functions + -g generate debug information -d, --debug enable debug output on stderr/stdout -h, --help print this help message --version print the Cranelift version @@ -76,6 +77,7 @@ struct Args { arg_file: Vec, flag_optimize: bool, flag_debug: bool, + flag_g: bool, flag_invoke: Option, } @@ -137,6 +139,9 @@ fn main() { instantiate_spectest().expect("instantiating spectest"), ); + // Enable/disable producing of debug info. + context.set_debug_info(args.flag_g); + for filename in &args.arg_file { let path = Path::new(&filename); match handle_module(&mut context, &args, path) { diff --git a/tests/instantiate.rs b/tests/instantiate.rs index 3901b82c9409..adf49456887d 100644 --- a/tests/instantiate.rs +++ b/tests/instantiate.rs @@ -42,6 +42,6 @@ fn test_environ_translate() { let mut resolver = NullResolver {}; let mut compiler = Compiler::new(isa); let global_exports = Rc::new(RefCell::new(HashMap::new())); - let instance = instantiate(&mut compiler, &data, &mut resolver, global_exports); + let instance = instantiate(&mut compiler, &data, &mut resolver, global_exports, false); assert!(instance.is_ok()); }