WebAssembly · kripken · Oct 18, 2024 · Oct 16, 2024 · Oct 16, 2024 · Oct 16, 2024
diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
@@ -743,8 +743,8 @@ def run_d8_js(js, args=[], liftoff=True):
 FUZZ_SHELL_JS = in_binaryen('scripts', 'fuzz_shell.js')
 
 
-def run_d8_wasm(wasm, liftoff=True):
-    return run_d8_js(FUZZ_SHELL_JS, [wasm], liftoff=liftoff)
+def run_d8_wasm(wasm, liftoff=True, args=[]):
+    return run_d8_js(FUZZ_SHELL_JS, [wasm] + args, liftoff=liftoff)
 
 
 def all_disallowed(features):
@@ -1391,6 +1391,111 @@ def handle(self, wasm):
         compare_between_vms(output, merged_output, 'Merge')
 
 
+FUNC_NAMES_REGEX = re.compile(r'\n [(]func [$](\S+)')
+
+
+# Tests wasm-split
+class Split(TestCaseHandler):
+    frequency = 1  # TODO: adjust lower when we actually enable this
+
+    def handle(self, wasm):
+        # get the list of function names, some of which we will decide to split
+        # out
+        wat = run([in_bin('wasm-dis'), wasm] + FEATURE_OPTS)
+        all_funcs = re.findall(FUNC_NAMES_REGEX, wat)
+
+        # get the original output before splitting
+        output = run_d8_wasm(wasm)
+        output = fix_output(output)
+
+        # find the names of the exports. we need this because when we split the
+        # module then new exports appear to connect the two halves of the
+        # original module. we do not want to call all the exports on the new
+        # primary module, but only the original ones.
+        exports = []
+        for line in output.splitlines():
+            if FUZZ_EXEC_CALL_PREFIX in line:
+                exports.append(get_export_from_call_line(line))
+
+        # pick which to split out, with a random rate of picking (biased towards
+        # 0.5).
+        rate = (random.random() + random.random()) / 2
+        split_funcs = []
+        for func in all_funcs:
+            if random.random() < rate:
+                split_funcs.append(func)
+
+        if not split_funcs:
+            # nothing to split out
+            return
+
+        # split the wasm into two
+        primary = wasm + '.primary.wasm'
+        secondary = wasm + '.secondary.wasm'
+
+        # we require reference types, because that allows us to create our own
+        # table. without that we use the existing table, and that may interact
+        # with user code in odd ways (it really only works with the particular
+        # form of table+segments that LLVM emits, and not with random fuzzer
+        # content).
+        split_feature_opts = FEATURE_OPTS + ['--enable-reference-types']
+
+        run([in_bin('wasm-split'), wasm, '--split',
+             '--split-funcs', ','.join(split_funcs),
+             '--primary-output', primary,
+             '--secondary-output', secondary] + split_feature_opts)
+
+        # sometimes also optimize the split modules
+        optimized = False
+
+        def optimize(name):
+            # do not optimize if it would change the ABI
+            if CLOSED_WORLD:
+                return name
+            # TODO: use other optimizations here, but we'd need to be careful of
+            #       anything that can alter the ABI, and also current
+            #       limitations of open-world optimizations (see discussion in
+            #       https://github.com/WebAssembly/binaryen/pull/6660)
+            opts = ['-O3']
+            new_name = name + '.opt.wasm'
+            run([in_bin('wasm-opt'), name, '-o', new_name, '-all'] + opts + split_feature_opts)
+            nonlocal optimized
+            optimized = True
+            return new_name
+
+        if random.random() < 0.5:
+            primary = optimize(primary)
+        if random.random() < 0.5:
+            secondary = optimize(secondary)
+
+        # prepare the list of exports to call. the format is
+        #
+        #  exports:A,B,C
+        #
+        exports_to_call = 'exports:' + ','.join(exports)
+
+        # get the output from the split modules, linking them using JS
+        # TODO run liftoff/turboshaft/etc.
+        linked_output = run_d8_wasm(primary, args=[secondary, exports_to_call])
+        linked_output = fix_output(linked_output)
+
+        # see D8.can_compare_to_self: we cannot compare optimized outputs if
+        # NaNs are allowed, as the optimizer can modify NaNs differently than
+        # the JS engine.
+        if not (NANS and optimized):
+            compare_between_vms(output, linked_output, 'Split')
+
+    def can_run_on_feature_opts(self, feature_opts):
+        # to run the split wasm we use JS, that is, JS links the exports of one
+        # to the imports of the other, etc. since we run in JS, the wasm must be
+        # valid for JS.
+        if not LEGALIZE:
+            return False
+
+        # see D8.can_run
+        return all_disallowed(['shared-everything'])
+
+
 # Check that the text format round-trips without error.
 class RoundtripText(TestCaseHandler):
     frequency = 0.05
@@ -1413,6 +1518,8 @@ def handle(self, wasm):
     TrapsNeverHappen(),
     CtorEval(),
     Merge(),
+    # TODO: enable when stable enough, and adjust |frequency| (see above)
+    # Split(),
     RoundtripText()
 ]
 

diff --git a/scripts/fuzz_shell.js b/scripts/fuzz_shell.js
@@ -1,43 +1,54 @@
-// Shell integration.
-if (typeof console === 'undefined') {
-  console = { log: print };
-}
-var tempRet0;
-var binary;
-if (typeof process === 'object' && typeof require === 'function' /* node.js detection */) {
-  var args = process.argv.slice(2);
-  binary = require('fs').readFileSync(args[0]);
-  if (!binary.buffer) binary = new Uint8Array(binary);
+// Shell integration: find argv and set up readBinary().
+var argv;
+var readBinary;
+if (typeof process === 'object' && typeof require === 'function') {
+  // Node.js.
+  argv = process.argv.slice(2);
+  readBinary = function(name) {
+    var data = require('fs').readFileSync(name);
+    if (!data.buffer) data = new Uint8Array(data);
+    return data;
+  };
 } else {
-  var args;
+  // A shell like D8.
   if (typeof scriptArgs != 'undefined') {
-    args = scriptArgs;
+    argv = scriptArgs;
   } else if (typeof arguments != 'undefined') {
-    args = arguments;
-  }
-  if (typeof readbuffer === 'function') {
-    binary = new Uint8Array(readbuffer(args[0]));
-  } else {
-    binary = read(args[0], 'binary');
+    argv = arguments;
   }
+  readBinary = function(name) {
+    if (typeof readbuffer === 'function') {
+      return new Uint8Array(readbuffer(name));
+    } else {
+      return read(name, 'binary');
+    }
+  };
+}
+
+// We are given the binary to run as a parameter.
+var binary = readBinary(argv[0]);
+
+// Normally we call all the exports of the given wasm file. But, if we are
+// passed a final parameter in the form of "exports:X,Y,Z" then we call
+// specifically the exports X, Y, and Z.
+var exportsToCall;
+if (argv[argv.length - 1].startsWith('exports:')) {
+  exportsToCall = argv[argv.length - 1].substr('exports:'.length).split(',');
+  argv.pop();
+}
+
+// If a second parameter is given, it is a second binary that we will link in
+// with it.
+var secondBinary;
+if (argv[1]) {
+  secondBinary = readBinary(argv[1]);
 }
 
 // Utilities.
 function assert(x, y) {
   if (!x) throw (y || 'assertion failed');// + new Error().stack;
 }
 
-// Deterministic randomness.
-var detrand = (function() {
-  var hash = 5381; // TODO DET_RAND_SEED;
-  var x = 0;
-  return function() {
-    hash = (((hash << 5) + hash) ^ (x & 0xff)) >>> 0;
-    x = (x + 1) % 256;
-    return (hash % 256) / 256;
-  };
-})();
-
 // Print out a value in a way that works well for fuzzing.
 function printed(x, y) {
   if (typeof y !== 'undefined') {
@@ -124,6 +135,7 @@ function logValue(x, y) {
 }
 
 // Set up the imports.
+var tempRet0;
 var imports = {
   'fuzzing-support': {
     'log-i32': logValue,
@@ -151,6 +163,24 @@ if (typeof WebAssembly.Tag !== 'undefined') {
   };
 }
 
+// If a second binary will be linked in then set up the imports for
+// placeholders. Any import like  (import "placeholder" "0" (func ..  will be
+// provided by the secondary module, and must be called using an indirection.
+if (secondBinary) {
+  imports['placeholder'] = new Proxy({}, {
+    get(target, prop, receiver) {
+      // Return a function that throws. We could do an indirect call using the
+      // exported table, but as we immediately link in the secondary module,
+      // these stubs will not be called (they are written to the table, and the
+      // secondary module overwrites them). We do need to return something so
+      // the primary module links without erroring, though.
+      return () => {
+        throw 'proxy stub should not be called';
+      }
+    }
+  });
+}
+
 // Create the wasm.
 var module = new WebAssembly.Module(binary);
 
@@ -165,17 +195,32 @@ try {
 // Handle the exports.
 var exports = instance.exports;
 
-var view;
+// Link in a second module, if one was provided.
+if (secondBinary) {
+  var secondModule = new WebAssembly.Module(secondBinary);
 
-// Recreate the view. This is important both initially and after a growth.
-function refreshView() {
-  if (exports.memory) {
-    view = new Int32Array(exports.memory.buffer);
+  // The secondary module just needs to import the primary one: all original
+  // imports it might have needed were exported from there.
+  var secondImports = {'primary': exports};
+  var secondInstance;
+  try {
+    secondInstance = new WebAssembly.Instance(secondModule, secondImports);
+  } catch (e) {
+    console.log('exception thrown: failed to instantiate second module');
+    quit();
   }
 }
 
 // Run the wasm.
-for (var e in exports) {
+if (!exportsToCall) {
+  // We were not told specific exports, so call them all.
+  exportsToCall = [];
+  for (var e in exports) {
+    exportsToCall.push(e);
+  }
+}
+
+for (var e of exportsToCall) {
   if (typeof exports[e] !== 'function') {
     continue;
   }