Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
1e2baa2
start
kripken Dec 18, 2024
07606f6
work
kripken Dec 18, 2024
7d3980c
fix
kripken Dec 18, 2024
6a85f2e
fix
kripken Dec 18, 2024
89bdcac
fix
kripken Dec 18, 2024
a69d7f8
work
kripken Dec 18, 2024
374f5b1
work
kripken Dec 18, 2024
88e12a4
work
kripken Dec 18, 2024
170f7dd
work
kripken Dec 18, 2024
5669479
work
kripken Dec 18, 2024
0611b9c
work
kripken Dec 18, 2024
bd0f0e9
work
kripken Dec 18, 2024
3d9af45
work
kripken Dec 18, 2024
28cb857
fix?
kripken Dec 18, 2024
cd9f117
work
kripken Dec 18, 2024
71d8a73
work
kripken Dec 18, 2024
7b6171c
work
kripken Dec 18, 2024
c21354c
work
kripken Dec 18, 2024
c65cc27
undo
kripken Dec 18, 2024
32c30ab
test
kripken Dec 19, 2024
e8e1e24
test
kripken Dec 19, 2024
62f71c9
work
kripken Dec 19, 2024
029fae6
work
kripken Dec 19, 2024
3d03fa8
oops
kripken Dec 19, 2024
c8cc9b6
test
kripken Dec 19, 2024
da419e8
basename
kripken Dec 19, 2024
f85cb36
work
kripken Dec 19, 2024
1dc68d8
Merge remote-tracking branch 'origin/main' into initial.cluster
kripken Dec 19, 2024
07ca22c
show
kripken Dec 19, 2024
b9e0002
Merge branch 'nfc.fuzz' into initial.cluster
kripken Dec 19, 2024
168cb5f
Merge branch 'instantiate.error' into initial.cluster
kripken Dec 19, 2024
64be86c
better
kripken Dec 19, 2024
9fbfba1
Merge branch 'instantiate.error' into initial.cluster
kripken Dec 19, 2024
896f3dc
lint
kripken Dec 19, 2024
5cdf404
Merge remote-tracking branch 'origin/main' into initial.cluster
kripken Dec 20, 2024
bded873
better
kripken Dec 20, 2024
f91d80c
better
kripken Dec 20, 2024
13572f9
Merge remote-tracking branch 'origin/main' into initial.cluster
kripken Dec 20, 2024
0fe503b
fix
kripken Dec 20, 2024
e5cbaf5
Merge branch 'moar.move' into initial.cluster
kripken Dec 20, 2024
77cf4fc
Merge remote-tracking branch 'origin/main' into moar.move
kripken Dec 20, 2024
dfd9a96
Merge branch 'moar.move' into initial.cluster
kripken Dec 20, 2024
a9c6f69
remove simd, which works now, and add link for names
kripken Dec 20, 2024
9b5ab79
Merge branch 'moar.move' into initial.cluster
kripken Dec 20, 2024
23c84ec
Merge remote-tracking branch 'origin/main' into initial.cluster
kripken Dec 20, 2024
01403c6
Merge remote-tracking branch 'origin/main' into initial.cluster
kripken Jan 6, 2025
714572e
Merge remote-tracking branch 'origin/main' into initial.cluster
kripken Jan 7, 2025
c41d01d
Update test/unit/test_cluster_fuzz.py
kripken Jan 7, 2025
22628d7
lint
kripken Jan 7, 2025
fdfa966
skip if no v8
kripken Jan 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions scripts/bundle_clusterfuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
'''

import os
import subprocess
import sys
import tarfile

Expand All @@ -87,7 +88,9 @@
# Delete the argument, as importing |shared| scans it.
sys.argv.pop()

from test import fuzzing # noqa
from test import shared # noqa
from test import support # noqa
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What lints are we skipping here?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The one that imports must be on top (which we can't do because of lines 83-89, which I can't think of a nice alternative to).


# Pick where to get the builds
if build_dir:
Expand All @@ -97,6 +100,14 @@
binaryen_bin = shared.options.binaryen_bin
binaryen_lib = shared.options.binaryen_lib

# ClusterFuzz's run.py uses these features. Keep this in sync with that, so that
# we only bundle initial content that makes sense for it.
features = [
'-all',
'--disable-shared-everything',
'--disable-fp16',
]
Comment on lines +105 to +109
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be feasible to deduplicate these into a shared location?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not trivially, since run.py is not part of the normal "tree". We never run it as part of our own code, only after being packaged for ClusterFuzz. So we don't have the normal shared locations. I suppose we could have a file that we import locally and copy over for ClusterFuzz, but given this is just a few lines, that seems a bit excessive to me.


with tarfile.open(output_file, "w:gz") as tar:
# run.py
run = os.path.join(shared.options.binaryen_root, 'scripts', 'clusterfuzz', 'run.py')
Expand Down Expand Up @@ -128,6 +139,40 @@
print(f' ......... : {path}')
tar.add(path, arcname=f'lib/{name}')

# Add tests we will use as initial content under initial/. We put all the
# tests from the test suite there.
print(' .. initial content: ')
temp_wasm = 'temp.wasm'
index = 0
all_tests = shared.get_all_tests()
for i, test in enumerate(all_tests):
if not fuzzing.is_fuzzable(test):
continue
for wast, asserts in support.split_wast(test):
if not wast:
continue
support.write_wast(temp_wasm, wast)
# If the file is not valid for our features, skip it. In the same
# operation, also convert to binary if this was text (binary is more
# compact).
cmd = shared.WASM_OPT + ['-q', temp_wasm, '-o', temp_wasm] + features
if subprocess.run(cmd, stderr=subprocess.PIPE).returncode:
continue

# Looks good.
tar.add(temp_wasm, arcname=f'initial/{index}.wasm')
index += 1
print(f'\r {100 * i / len(all_tests):.2f}%', end='', flush=True)
print(f' (num: {index})')

# Write initial/num.txt which contains the number of testcases in that
# directory (saves run.py from needing to listdir each time).
Comment on lines +168 to +169
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this make a measurable performance difference?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't measure this on ClusterFuzz itself, which is where it really matters. But imagine that ClusterFuzz might start to move to calling run.py more times for fewer testcases (rather than once for 1,000, it could do 1000 for single testcases each), then this constant overhead could matter.

num_txt = 'num.txt'
with open(num_txt, 'w') as f:
f.write(f'{index}')
tar.add(num_txt, arcname='initial/num.txt')


print('Done.')
print('To run the tests on this bundle, do:')
print()
Expand Down
2 changes: 1 addition & 1 deletion scripts/clusterfuzz/extract_wasms.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def repl(text):


# Replace the wasm files and write them out.
js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\);', repl, js)
js = re.sub(r'var \w+ = new Uint8Array\(\[([\d,]+)\]\)', repl, js)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why no more semicolon?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because now we can have one of the new comments between this and the semicolon. Detecting that would make this complicated for no useful reason (and adding the comment after the semicolon would be a little annoying in run.py).

(Sorry if this wasn't clear enough in the description.)


# Write out the new JS.
with open(f'{out}.js', 'w') as f:
Expand Down
37 changes: 34 additions & 3 deletions scripts/clusterfuzz/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@
# testcase.
JS_SHELL_PATH = os.path.join(ROOT_DIR, 'scripts', 'fuzz_shell.js')

# The path to the directory with initial contents.
INITIAL_CONTENT_PATH = os.path.join(ROOT_DIR, 'initial')

# The file that contains the number of initial contents
INITIAL_CONTENT_NUM_PATH = os.path.join(ROOT_DIR, 'initial', 'num.txt')

# The arguments we provide to wasm-opt to generate wasm files.
FUZZER_ARGS = [
# Generate a wasm from random data.
Expand All @@ -76,7 +82,8 @@
'--fuzz-passes',
# Enable all features but disable ones not yet ready for fuzzing. This may
# be a smaller set than fuzz_opt.py, as that enables a few experimental
# flags, while here we just fuzz with d8's --wasm-staging.
# flags, while here we just fuzz with d8's --wasm-staging. This should be
# synchonized with bundle_clusterfuzz.
'-all',
'--disable-shared-everything',
'--disable-fp16',
Expand All @@ -92,6 +99,17 @@ def get_file_name(prefix, index):
# (We also use urandom below, which uses this under the hood.)
system_random = random.SystemRandom()

# The number of initial content testcases that were bundled for us, in the
# "initial/" subdir.
with open(INITIAL_CONTENT_NUM_PATH) as f:
num_initial_contents = int(f.read())


def get_random_initial_content():
index = system_random.randint(0, num_initial_contents - 1)
return os.path.join(INITIAL_CONTENT_PATH, f'{index}.wasm')


# In production ClusterFuzz we retry whenever we see a wasm-opt error. We are
# not looking for wasm-opt issues there, and just use it to generate testcases
# for VMs. For local testing, however, we may want to disable retrying, which
Expand All @@ -117,9 +135,19 @@ def get_wasm_contents(i, output_dir):
with open(input_data_file_path, 'wb') as file:
file.write(os.urandom(random_size))

# Generate wasm from the random data.
# Generate a command to use wasm-opt with the proper args to generate
# wasm content from the input data.
cmd = [FUZZER_BINARY_PATH] + FUZZER_ARGS
cmd += ['-o', wasm_file_path, input_data_file_path]

# Sometimes use a file from the initial content testcases.
if system_random.random() < 0.5:
initial_content = get_random_initial_content()
cmd += ['--initial-fuzz=' + initial_content]
else:
initial_content = None

# Generate wasm from the random data.
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError:
Expand Down Expand Up @@ -148,7 +176,10 @@ def get_wasm_contents(i, output_dir):

# Convert to a string, and wrap into a typed array.
wasm_contents = ','.join([str(c) for c in wasm_contents])
return f'new Uint8Array([{wasm_contents}])'
js = f'new Uint8Array([{wasm_contents}])'
if initial_content:
js = f'{js} /* using initial content {os.path.basename(initial_content)} */'
return js


# Returns the contents of a .js fuzz file, given the index of the testcase and
Expand Down
60 changes: 60 additions & 0 deletions test/unit/test_cluster_fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,15 @@ def test_file_contents(self):
seen_calls = []
seen_second_builds = []
seen_JSPIs = []
seen_initial_contents = []

# Initial contents are noted in comments like this:
#
# /* using initial content 42.wasm */
#
# Note that we may see more than one in a file, as we may have more than
# one wasm in each testcase: each wasm has a chance.
initial_content_regex = re.compile(r'[/][*] using initial content ([^ ]+) [*][/]')

for i in range(1, N + 1):
fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js')
Expand All @@ -302,6 +311,8 @@ def test_file_contents(self):
assert '/* async */' in js
assert '/* await */' in js

seen_initial_contents.append(re.findall(initial_content_regex, js))

# There is always one build and one call (those are in the default
# fuzz_shell.js), and we add a couple of operations, each with equal
# probability to be a build or a call, so over the 100 testcases here we
Expand Down Expand Up @@ -346,6 +357,55 @@ def test_file_contents(self):

print()

# Flatten the data to help some of the below, from
# [['a.wasm', 'b.wasm'], ['c.wasm']]
# into
# ['a.wasm', 'b.wasm', 'c.wasm']
flat_initial_contents = [item for items in seen_initial_contents for item in items]

# Initial content appear 50% of the time for each wasm file. Each
# testcase has 1.333 wasm files on average.
print('Initial contents are distributed as ~ mean 0.68')
print(f'mean initial contents: {len(flat_initial_contents) / N}')
# Initial contents should be mostly unique (we have many, many testcases
# and we pick just 100 or so). And we must see more than one unique one.
unique_initial_contents = set(flat_initial_contents)
print(f'unique initial contents: {len(unique_initial_contents)} should be almost equal to {len(flat_initial_contents)}')
self.assertGreater(len(unique_initial_contents), 1)
# Not all testcases have initial contents.
num_initial_contents = [len(items) for items in seen_initial_contents]
self.assertEqual(min(num_initial_contents), 0)
# Some do (this is redundant given that the set of unique initial
# contents was asserted on before, so this just confirms/checks that).
self.assertGreaterEqual(max(num_initial_contents), 1)

print()

# Execute the files in V8. Almost all should execute properly (some
# small number may trap during startup, say on a segment out of bounds).
if shared.V8:
valid_executions = 0
for i in range(1, N + 1):
fuzz_file = os.path.join(temp_dir.name, f'fuzz-binaryen-{i}.js')

cmd = [shared.V8, '--wasm-staging', fuzz_file]
proc = subprocess.run(cmd, stdout=subprocess.PIPE)

# An execution is valid if we exited without error, and if we
# managed to run some code before exiting (modules with no
# exports will be considered "invalid" here, but that is very
# rare, and in a sense they are actually unuseful).
if proc.returncode == 0 and b'[fuzz-exec] calling ' in proc.stdout:
valid_executions += 1

print('Valid executions are distributed as ~ mean 0.99')
print(f'mean valid executions: {valid_executions / N}')
# Assert on having at least half execute properly. Given the true mean
# is 0.9, for half of 100 to fail is incredibly unlikely.
self.assertGreater(valid_executions, N / 2)

print()

# "zzz" in test name so that this runs last. If it runs first, it can be
# confusing as it appears next to the logging of which bundle we use (see
# setUpClass).
Expand Down