Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 109 additions & 2 deletions scripts/fuzz_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,8 +743,8 @@ def run_d8_js(js, args=[], liftoff=True):
FUZZ_SHELL_JS = in_binaryen('scripts', 'fuzz_shell.js')


def run_d8_wasm(wasm, liftoff=True):
return run_d8_js(FUZZ_SHELL_JS, [wasm], liftoff=liftoff)
def run_d8_wasm(wasm, liftoff=True, args=[]):
return run_d8_js(FUZZ_SHELL_JS, [wasm] + args, liftoff=liftoff)


def all_disallowed(features):
Expand Down Expand Up @@ -1391,6 +1391,111 @@ def handle(self, wasm):
compare_between_vms(output, merged_output, 'Merge')


FUNC_NAMES_REGEX = re.compile(r'\n [(]func [$](\S+)')


# Tests wasm-split
class Split(TestCaseHandler):
frequency = 1 # TODO: adjust lower when we actually enable this

def handle(self, wasm):
# get the list of function names, some of which we will decide to split
# out
wat = run([in_bin('wasm-dis'), wasm] + FEATURE_OPTS)
all_funcs = re.findall(FUNC_NAMES_REGEX, wat)

# get the original output before splitting
output = run_d8_wasm(wasm)
output = fix_output(output)

# find the names of the exports. we need this because when we split the
# module then new exports appear to connect the two halves of the
# original module. we do not want to call all the exports on the new
# primary module, but only the original ones.
exports = []
Comment on lines +1411 to +1415
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be simpler to use the --export-prefix option to add a unique prefix to the new exports that we can filter out directly in the JS wrapper.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, then we'd need to hardcode some special prefix in the JS wrapper, but I'm not sure there is a fixed prefix we can use: we don't want to overlap with existing export names.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW Emscripten uses % as the prefix. Maybe we can just choose an arbitrary one like that and it would be good enough?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the difference is that Emscripten has full control over the export names. In the fuzzer we do want to be able to fuzz initial content from anywhere. I suppose we could sanitize that content before fuzzing it, but that seems more complicated to me.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, the currently solution LGTM, then.

for line in output.splitlines():
if FUZZ_EXEC_CALL_PREFIX in line:
exports.append(get_export_from_call_line(line))

# pick which to split out, with a random rate of picking (biased towards
# 0.5).
rate = (random.random() + random.random()) / 2
split_funcs = []
for func in all_funcs:
if random.random() < rate:
split_funcs.append(func)

if not split_funcs:
# nothing to split out
return

# split the wasm into two
primary = wasm + '.primary.wasm'
secondary = wasm + '.secondary.wasm'

# we require reference types, because that allows us to create our own
# table. without that we use the existing table, and that may interact
# with user code in odd ways (it really only works with the particular
# form of table+segments that LLVM emits, and not with random fuzzer
# content).
split_feature_opts = FEATURE_OPTS + ['--enable-reference-types']

run([in_bin('wasm-split'), wasm, '--split',
'--split-funcs', ','.join(split_funcs),
'--primary-output', primary,
'--secondary-output', secondary] + split_feature_opts)

# sometimes also optimize the split modules
optimized = False

def optimize(name):
# do not optimize if it would change the ABI
if CLOSED_WORLD:
return name
# TODO: use other optimizations here, but we'd need to be careful of
# anything that can alter the ABI, and also current
# limitations of open-world optimizations (see discussion in
# https://github.com/WebAssembly/binaryen/pull/6660)
opts = ['-O3']
new_name = name + '.opt.wasm'
run([in_bin('wasm-opt'), name, '-o', new_name, '-all'] + opts + split_feature_opts)
nonlocal optimized
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat, I've never seen nonlocal before.

optimized = True
return new_name

if random.random() < 0.5:
primary = optimize(primary)
if random.random() < 0.5:
secondary = optimize(secondary)

# prepare the list of exports to call. the format is
#
# exports:A,B,C
#
exports_to_call = 'exports:' + ','.join(exports)

# get the output from the split modules, linking them using JS
# TODO run liftoff/turboshaft/etc.
linked_output = run_d8_wasm(primary, args=[secondary, exports_to_call])
linked_output = fix_output(linked_output)

# see D8.can_compare_to_self: we cannot compare optimized outputs if
# NaNs are allowed, as the optimizer can modify NaNs differently than
# the JS engine.
if not (NANS and optimized):
compare_between_vms(output, linked_output, 'Split')

def can_run_on_feature_opts(self, feature_opts):
# to run the split wasm we use JS, that is, JS links the exports of one
# to the imports of the other, etc. since we run in JS, the wasm must be
# valid for JS.
if not LEGALIZE:
return False

# see D8.can_run
return all_disallowed(['shared-everything'])


# Check that the text format round-trips without error.
class RoundtripText(TestCaseHandler):
frequency = 0.05
Expand All @@ -1413,6 +1518,8 @@ def handle(self, wasm):
TrapsNeverHappen(),
CtorEval(),
Merge(),
# TODO: enable when stable enough, and adjust |frequency| (see above)
# Split(),
RoundtripText()
]

Expand Down
115 changes: 80 additions & 35 deletions scripts/fuzz_shell.js
Original file line number Diff line number Diff line change
@@ -1,43 +1,54 @@
// Shell integration.
if (typeof console === 'undefined') {
console = { log: print };
}
var tempRet0;
var binary;
if (typeof process === 'object' && typeof require === 'function' /* node.js detection */) {
var args = process.argv.slice(2);
binary = require('fs').readFileSync(args[0]);
if (!binary.buffer) binary = new Uint8Array(binary);
// Shell integration: find argv and set up readBinary().
var argv;
var readBinary;
if (typeof process === 'object' && typeof require === 'function') {
// Node.js.
argv = process.argv.slice(2);
readBinary = function(name) {
var data = require('fs').readFileSync(name);
if (!data.buffer) data = new Uint8Array(data);
return data;
};
} else {
var args;
// A shell like D8.
if (typeof scriptArgs != 'undefined') {
args = scriptArgs;
argv = scriptArgs;
} else if (typeof arguments != 'undefined') {
args = arguments;
}
if (typeof readbuffer === 'function') {
binary = new Uint8Array(readbuffer(args[0]));
} else {
binary = read(args[0], 'binary');
argv = arguments;
}
readBinary = function(name) {
if (typeof readbuffer === 'function') {
return new Uint8Array(readbuffer(name));
} else {
return read(name, 'binary');
}
};
}

// We are given the binary to run as a parameter.
var binary = readBinary(argv[0]);

// Normally we call all the exports of the given wasm file. But, if we are
// passed a final parameter in the form of "exports:X,Y,Z" then we call
// specifically the exports X, Y, and Z.
var exportsToCall;
if (argv[argv.length - 1].startsWith('exports:')) {
exportsToCall = argv[argv.length - 1].substr('exports:'.length).split(',');
argv.pop();
}

// If a second parameter is given, it is a second binary that we will link in
// with it.
var secondBinary;
if (argv[1]) {
secondBinary = readBinary(argv[1]);
}

// Utilities.
function assert(x, y) {
if (!x) throw (y || 'assertion failed');// + new Error().stack;
}

// Deterministic randomness.
var detrand = (function() {
var hash = 5381; // TODO DET_RAND_SEED;
var x = 0;
return function() {
hash = (((hash << 5) + hash) ^ (x & 0xff)) >>> 0;
x = (x + 1) % 256;
return (hash % 256) / 256;
};
})();

// Print out a value in a way that works well for fuzzing.
function printed(x, y) {
if (typeof y !== 'undefined') {
Expand Down Expand Up @@ -124,6 +135,7 @@ function logValue(x, y) {
}

// Set up the imports.
var tempRet0;
var imports = {
'fuzzing-support': {
'log-i32': logValue,
Expand Down Expand Up @@ -151,6 +163,24 @@ if (typeof WebAssembly.Tag !== 'undefined') {
};
}

// If a second binary will be linked in then set up the imports for
// placeholders. Any import like (import "placeholder" "0" (func .. will be
// provided by the secondary module, and must be called using an indirection.
if (secondBinary) {
imports['placeholder'] = new Proxy({}, {
get(target, prop, receiver) {
// Return a function that throws. We could do an indirect call using the
// exported table, but as we immediately link in the secondary module,
// these stubs will not be called (they are written to the table, and the
// secondary module overwrites them). We do need to return something so
// the primary module links without erroring, though.
return () => {
throw 'proxy stub should not be called';
}
}
});
}

// Create the wasm.
var module = new WebAssembly.Module(binary);

Expand All @@ -165,17 +195,32 @@ try {
// Handle the exports.
var exports = instance.exports;

var view;
// Link in a second module, if one was provided.
if (secondBinary) {
var secondModule = new WebAssembly.Module(secondBinary);

// Recreate the view. This is important both initially and after a growth.
function refreshView() {
if (exports.memory) {
view = new Int32Array(exports.memory.buffer);
// The secondary module just needs to import the primary one: all original
// imports it might have needed were exported from there.
var secondImports = {'primary': exports};
var secondInstance;
try {
secondInstance = new WebAssembly.Instance(secondModule, secondImports);
} catch (e) {
console.log('exception thrown: failed to instantiate second module');
quit();
}
}

// Run the wasm.
for (var e in exports) {
if (!exportsToCall) {
// We were not told specific exports, so call them all.
exportsToCall = [];
for (var e in exports) {
exportsToCall.push(e);
}
}

for (var e of exportsToCall) {
if (typeof exports[e] !== 'function') {
continue;
}
Expand Down
Loading