Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -15639,3 +15639,9 @@ def has_defined_function(file, func):
# /emsdk/emscripten/system/lib/libcxx
self.assertTrue(has_defined_function('test_4.wasm', r'std::__2::ios_base::getloc\\28\\29\\20const'))
self.assertTrue(has_defined_function('test_4.wasm', r'std::uncaught_exceptions\\28\\29'))

# Check --print-sources option
out = self.run_process([empath_split, 'test.wasm', '--print-sources'], stdout=PIPE).stdout
self.assertIn('main.cpp', out)
self.assertIn('foo.cpp', out)
self.assertIn('/emsdk/emscripten/system/lib/libc/musl/src/string/strcmp.c', out)
73 changes: 61 additions & 12 deletions tools/empath-split.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,24 @@
split as the inner path's module, and the rest of the functions will be split as
the outer path's module. Functions that do not belong to any of the specified
paths will remain in the primary module.

The paths in the paths file can be either absolute or relative, but they should
match those of 'sources' field in the source map file. Sometimes a source map's
'sources' field contains paths relative to a build directory, so source files
may be recorded as '../src/subdir/test.c', for example. In this case, if you
want to split the directory src/subdir, you should list it as ../src/subdir. You
can manually open the source map file and check 'sources' field, but we also an
option to help that. You can do like
$ empath-split --print-sources test.wasm
or
$ empath-split --print-sources --source-map test.wasm.map
to print the list of sources in 'sources' field in the source map. Note that
emscripten's libraries' source files have /emsdk/emscripten prefix, which is a
fake deterministic prefix to produce reproducible builds across platforms.
"""

import argparse
import json
import os
import sys
import tempfile
Expand Down Expand Up @@ -59,23 +74,35 @@ def parse_args():
enabling/disabling options. Run 'wasm-split -h' for the list of options. But you
should NOT add --manifest, because this will be generated from this script.
""")
parser.add_argument('wasm', help='Path to the input wasm file')
parser.add_argument('paths_file', help='Path to the input file containing paths')
parser.add_argument('wasm', nargs='?', help='Path to the input wasm file')
parser.add_argument('paths_file', nargs='?', help='Path to the input file containing paths')
Comment on lines +77 to +78
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These became optional because when you use --print-sources these are not mandatory. (We manually check for their existence below when --print-sources is not given)

parser.add_argument('-s', '--sourcemap', help='Force source map file')
parser.add_argument('-v', '--verbose', action='store_true',
help='Print verbose info for debugging this script')
parser.add_argument('--wasm-split', help='Path to wasm-split executable')
parser.add_argument('--preserve-manifest', action='store_true',
help='Preserve generated manifest file. This sets --verbose too.')
args, forwarded_args = parser.parse_known_args()
parser.add_argument('--print-sources', action='store_true',
help='Print the list of sources in the source map to help figure out splitting boundaries. Does NOT perform the splitting.')

args, forwarded_args = parser.parse_known_args()
if args.preserve_manifest:
args.verbose = True
if not args.wasm_split:
args.wasm_split = os.path.join(building.get_binaryen_bin(), shared.exe_suffix('wasm-split'))

if '--manifest' in forwarded_args:
parser.error('manifest file will be generated by this script and should not be given')

if args.print_sources:
if not args.wasm and not args.sourcemap:
parser.error('--print-sources requires either wasm or --sourcemap')
return args, forwarded_args

if not args.wasm and not args.paths_file:
parser.error("the following arguments are required: wasm, paths_file")
if not args.paths_file:
parser.error("the following arguments are required: paths_file")
if '-o' not in forwarded_args and '--output' not in forwarded_args:
parser.error('-o (--output) is required')
return args, forwarded_args
Expand All @@ -88,22 +115,33 @@ def check_errors(args):
exit_with_error(f"'{args.paths_file}' was not found or not a file")

if args.sourcemap:
if not os.path.isfile(args.sourcemap):
exit_with_error(f"'{args.sourcemap}' was not found or not a file")
sourcemap = args.sourcemap

if args.wasm:
with webassembly.Module(args.wasm) as module:
if not args.sourcemap and not emsymbolizer.get_sourceMappingURL_section(module):
exit_with_error('sourceMappingURL section does not exist')
sourcemap = module.get_sourceMappingURL()
if not os.path.isfile(sourcemap):
exit_with_error(f"'{sourcemap}' was not found or not a file")
if not args.sourcemap:
if not emsymbolizer.get_sourceMappingURL_section(module):
exit_with_error('sourceMappingURL section does not exist')
sourcemap = module.get_sourceMappingURL()
if not module.has_name_section():
exit_with_error('Name section does not eixst')
exit_with_error('Name section does not exist')

if not os.path.isfile(sourcemap):
exit_with_error(f"'{sourcemap}' was not found or not a file")
if not os.path.isfile(args.wasm_split):
exit_with_error(f"'{args.wasm_split}' was not found or not a file")

# Check source map validity. Just perform simple checks to make sure mandatory
# fields exist.
try:
with open(sourcemap) as f:
source_map_data = json.load(f)
except json.JSONDecodeError:
exit_with_error(f'Invalid JSON format in file {args.sourcemap}')
for field in ['version', 'sources', 'mappings']:
if field not in source_map_data:
exit_with_error(f"Field '{field}' is missing in the source map")


def get_sourceMappingURL(wasm, arg_sourcemap):
if arg_sourcemap:
Expand All @@ -112,6 +150,14 @@ def get_sourceMappingURL(wasm, arg_sourcemap):
return module.get_sourceMappingURL()


def print_sources(sourcemap):
with open(sourcemap) as f:
sources = json.load(f).get('sources')
assert(isinstance(sources, list))
for src in sources:
print(src)


def get_path_to_functions_map(wasm, sourcemap, paths):
def is_synthesized_func(func):
# TODO There can be more
Expand Down Expand Up @@ -202,6 +248,9 @@ def main():
check_errors(args)

sourcemap = get_sourceMappingURL(args.wasm, args.sourcemap)
if args.print_sources:
print_sources(sourcemap)
return

paths = utils.read_file(args.paths_file).splitlines()
paths = [utils.normalize_path(path.strip()) for path in paths if path.strip()]
Expand All @@ -221,7 +270,7 @@ def main():
if not path_to_funcs[path]:
diagnostics.warn(f'{path} does not match any functions')
if args.verbose:
print(path)
print(f'{path}: {len(path_to_funcs[path])} functions')
for func in path_to_funcs[path]:
print(' ' + func)
print()
Expand Down