Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions src/wasm-binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -1490,8 +1490,11 @@ class WasmBinaryWriter {

MixedArena allocator;

// storage of source map locations until the section is placed at its final
// location (shrinking LEBs may cause changes there)
// Storage of source map locations until the section is placed at its final
// location (shrinking LEBs may cause changes there).
//
// A null DebugLocation* indicates we have no debug information for that
// location.
std::vector<std::pair<size_t, const Function::DebugLocation*>>
sourceMapLocations;
size_t sourceMapLocationsSizeAtSectionStart;
Expand Down Expand Up @@ -1522,13 +1525,26 @@ class WasmBinaryReader {
Module& wasm;
MixedArena& allocator;
const std::vector<char>& input;

std::istream* sourceMap;

struct NextDebugLocation {
uint32_t availablePos;
uint32_t previousPos;
Function::DebugLocation next;
};
NextDebugLocation nextDebugLocation;
} nextDebugLocation;

// Whether debug info is present next or not in the next debug location. A
// debug location can contain debug info (file:line:col) or it might not. We
// need to track this boolean alongside |nextDebugLocation| - that is, we
// can't just do something like std::optional<DebugLocation> or such - as we
// still need to track the values in |next|, as later positions are relative
// to them. That is, if we have line number 100, then no debug info, and then
// line number 500, then when we get to 500 we will see "+400" which is
// relative to the last existing line number (we "skip" over the place without
// debug info).
bool nextDebugLocationHasDebugInfo;

bool debugInfo = true;
bool DWARF = false;
bool skipFunctionBodies = false;
Expand Down
65 changes: 55 additions & 10 deletions src/wasm/wasm-binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1186,12 +1186,16 @@ void WasmBinaryWriter::writeSourceMapEpilog() {
*sourceMap << ",";
}
writeBase64VLQ(*sourceMap, int32_t(offset - lastOffset));
writeBase64VLQ(*sourceMap, int32_t(loc->fileIndex - lastLoc.fileIndex));
writeBase64VLQ(*sourceMap, int32_t(loc->lineNumber - lastLoc.lineNumber));
writeBase64VLQ(*sourceMap,
int32_t(loc->columnNumber - lastLoc.columnNumber));
lastLoc = *loc;
lastOffset = offset;
if (loc) {
// There is debug information for this location, so emit the next 3
// fields and update lastLoc.
writeBase64VLQ(*sourceMap, int32_t(loc->fileIndex - lastLoc.fileIndex));
writeBase64VLQ(*sourceMap, int32_t(loc->lineNumber - lastLoc.lineNumber));
writeBase64VLQ(*sourceMap,
int32_t(loc->columnNumber - lastLoc.columnNumber));
lastLoc = *loc;
}
}
*sourceMap << "\"}";
}
Expand Down Expand Up @@ -1340,7 +1344,28 @@ void WasmBinaryWriter::writeDebugLocation(Expression* curr, Function* func) {
auto& debugLocations = func->debugLocations;
auto iter = debugLocations.find(curr);
if (iter != debugLocations.end()) {
// There is debug information here, write it out.
writeDebugLocation(iter->second);
} else {
// This expression has no debug location. We need to emit an indication
// of that (so that we do not get "smeared" with debug info from anything
// before or after us).
//
// We don't need to write repeated "no debug info" indications, as a
// single one is enough to make it clear that the debug information before
// us is valid no longer. We also don't need to write one if there is
// nothing before us.
if (!sourceMapLocations.empty() &&
sourceMapLocations.back().second != nullptr) {
sourceMapLocations.emplace_back(o.size(), nullptr);

// Initialize the state of debug info to indicate there is no current
// debug info relevant. This sets |lastDebugLocation| to a dummy value,
// so that later places with debug info can see that they differ from
// it (without this, if we had some debug info, then a nullptr for none,
// and then the same debug info, we could get confused).
initializeDebugInfo();
}
}
}
// If this is an instruction in a function, and if the original wasm had
Expand Down Expand Up @@ -1614,7 +1639,8 @@ WasmBinaryReader::WasmBinaryReader(Module& wasm,
FeatureSet features,
const std::vector<char>& input)
: wasm(wasm), allocator(wasm.allocator), input(input),
sourceMap(nullptr), nextDebugLocation{0, 0, {0, 0, 0}}, debugLocation() {
sourceMap(nullptr), nextDebugLocation{0, 0, {0, 0, 0}},
nextDebugLocationHasDebugInfo(false), debugLocation() {
wasm.features = features;
}

Expand Down Expand Up @@ -2776,13 +2802,18 @@ void WasmBinaryReader::readSourceMapHeader() {
return;
}
// read first debug location
// TODO: Handle the case where the very first one has only a position but not
// debug info. In practice that does not happen, which needs
// investigation (if it does, it will assert in readBase64VLQ, so it
// would not be a silent error at least).
uint32_t position = readBase64VLQ(*sourceMap);
uint32_t fileIndex = readBase64VLQ(*sourceMap);
uint32_t lineNumber =
readBase64VLQ(*sourceMap) + 1; // adjust zero-based line number
uint32_t columnNumber = readBase64VLQ(*sourceMap);
nextDebugLocation = {
position, position, {fileIndex, lineNumber, columnNumber}};
nextDebugLocationHasDebugInfo = true;
}

void WasmBinaryReader::readNextDebugLocation() {
Expand All @@ -2803,7 +2834,11 @@ void WasmBinaryReader::readNextDebugLocation() {
debugLocation.clear();
// use debugLocation only for function expressions
if (currFunction) {
debugLocation.insert(nextDebugLocation.next);
if (nextDebugLocationHasDebugInfo) {
debugLocation.insert(nextDebugLocation.next);
} else {
debugLocation.clear();
}
}

char ch;
Expand All @@ -2818,6 +2853,17 @@ void WasmBinaryReader::readNextDebugLocation() {

int32_t positionDelta = readBase64VLQ(*sourceMap);
uint32_t position = nextDebugLocation.availablePos + positionDelta;

nextDebugLocation.previousPos = nextDebugLocation.availablePos;
nextDebugLocation.availablePos = position;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not new logic - just moved up from below (since we also want it for a 1-length entry).

Copy link
Contributor

@JesseCodeBones JesseCodeBones Aug 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kripken
Hi Alon,
does this optimization only impact the debug info read process? how about the BinaryenFunctionSetDebugLocation API? I think AssemblyScript is using this API for debug info generation, it also have the smeared issue for the shadow stack check.
for more detail: AssemblyScript/assemblyscript#2704
CC @dcodeIO @MaxGraey @HerrCai0907

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part of the code, or this PR as a whole?

The PR as a whole affects binary read and write. We now write 1-length segments to prevent smearing, and we read them properly as well.

SetDebugLocation should still be fine. That sets debug info on a single expression. This PR fixes things in binary reading and writing, which due to smearing would cause things to get debug info when they should not. See the testcase in the PR for an example of a bug that is fixed.


auto peek = sourceMap->peek();
if (peek == ',' || peek == '\"') {
// This is a 1-length entry, so the next location has no debug info.
nextDebugLocationHasDebugInfo = false;
break;
}

int32_t fileIndexDelta = readBase64VLQ(*sourceMap);
uint32_t fileIndex = nextDebugLocation.next.fileIndex + fileIndexDelta;
int32_t lineNumberDelta = readBase64VLQ(*sourceMap);
Expand All @@ -2826,9 +2872,8 @@ void WasmBinaryReader::readNextDebugLocation() {
uint32_t columnNumber =
nextDebugLocation.next.columnNumber + columnNumberDelta;

nextDebugLocation = {position,
nextDebugLocation.availablePos,
{fileIndex, lineNumber, columnNumber}};
nextDebugLocation.next = {fileIndex, lineNumber, columnNumber};
nextDebugLocationHasDebugInfo = true;
}
}

Expand Down
122 changes: 122 additions & 0 deletions test/lit/debug/source-map-stop.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.

;; RUN: wasm-opt %s -g -o %s.wasm -osm %s.wasm.map
;; RUN: wasm-opt %s.wasm -ism %s.wasm.map -S -o - | filecheck %s

;; Verify that writing to a source map and reading it back does not "smear"
;; debug info across adjacent instructions. The debug info in the output should
;; be identical to the input.

(module
;; CHECK: (func $test (param $0 i32) (result i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (call $test
;; CHECK-NEXT: ;;@ waka:100:1
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: ;;@ waka:200:2
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
(func $test (param i32) (result i32)
;; The drop&call have no debug info, and should remain so. Specifically the
;; instruction right before them in the binary (the const 1) should not
;; smear its debug info on it. And the drop is between an instruction that
;; has debug info (the const 1) and another (the i32.const 2): we should not
;; receive the debug info of either. (This is a regression test for a bug
;; that only happens in that state: removing the debug info either before or
;; after would avoid that bug.)

(drop
(call $test
;;@ waka:100:1
(i32.const 1)
)
)
;;@ waka:200:2
(i32.const 2)
)

;; CHECK: (func $same-later (param $0 i32) (result i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (call $test
;; CHECK-NEXT: ;;@ waka:100:1
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: ;;@ waka:100:1
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
(func $same-later (param i32) (result i32)
;; As the first, but now the later debug info is also 100:1. No debug info
;; should change here.

(drop
(call $test
;;@ waka:100:1
(i32.const 1)
)
)
;;@ waka:100:1
(i32.const 2)
)

;; CHECK: (func $more-before (param $0 i32) (result i32)
;; CHECK-NEXT: ;;@ waka:50:5
;; CHECK-NEXT: (nop)
;; CHECK-NEXT: ;;@ waka:50:5
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (call $test
;; CHECK-NEXT: ;;@ waka:100:1
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: ;;@ waka:200:2
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
(func $more-before (param i32) (result i32)
;; As the first, but now there is more debug info before the 100:1 (the
;; very first debug info in a function has special handling, so we test it
;; more carefully).
;;
;; The s-parser actually smears 50:5 on the drop and call after it, so the
;; output here looks incorrect. This may be a bug there, TODO

;;@ waka:50:5
(nop)
(drop
(call $test
;;@ waka:100:1
(i32.const 1)
)
)
;;@ waka:200:2
(i32.const 2)
)

;; CHECK: (func $nothing-before (param $0 i32) (result i32)
;; CHECK-NEXT: (nop)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (call $test
;; CHECK-NEXT: ;;@ waka:100:1
;; CHECK-NEXT: (i32.const 1)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: ;;@ waka:200:2
;; CHECK-NEXT: (i32.const 2)
;; CHECK-NEXT: )
(func $nothing-before (param i32) (result i32)
;; As before, but no debug info on the nop before us (so the first
;; instruction in the function no longer has a debug annotation). Nothing
;; should change in the debug info.
(nop)
(drop
(call $test
;;@ waka:100:1
(i32.const 1)
)
)
;;@ waka:200:2
(i32.const 2)
)
)