diff --git a/test/test_other.py b/test/test_other.py index 60f11f147f737..1418ee4701d82 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -15639,3 +15639,9 @@ def has_defined_function(file, func): # /emsdk/emscripten/system/lib/libcxx self.assertTrue(has_defined_function('test_4.wasm', r'std::__2::ios_base::getloc\\28\\29\\20const')) self.assertTrue(has_defined_function('test_4.wasm', r'std::uncaught_exceptions\\28\\29')) + + # Check --print-sources option + out = self.run_process([empath_split, 'test.wasm', '--print-sources'], stdout=PIPE).stdout + self.assertIn('main.cpp', out) + self.assertIn('foo.cpp', out) + self.assertIn('/emsdk/emscripten/system/lib/libc/musl/src/string/strcmp.c', out) diff --git a/tools/empath-split.py b/tools/empath-split.py index e7e1ef57db888..3fabebb0ce883 100755 --- a/tools/empath-split.py +++ b/tools/empath-split.py @@ -28,9 +28,24 @@ split as the inner path's module, and the rest of the functions will be split as the outer path's module. Functions that do not belong to any of the specified paths will remain in the primary module. + +The paths in the paths file can be either absolute or relative, but they should +match those of 'sources' field in the source map file. Sometimes a source map's +'sources' field contains paths relative to a build directory, so source files +may be recorded as '../src/subdir/test.c', for example. In this case, if you +want to split the directory src/subdir, you should list it as ../src/subdir. You +can manually open the source map file and check 'sources' field, but we also an +option to help that. You can do like +$ empath-split --print-sources test.wasm +or +$ empath-split --print-sources --source-map test.wasm.map +to print the list of sources in 'sources' field in the source map. Note that +emscripten's libraries' source files have /emsdk/emscripten prefix, which is a +fake deterministic prefix to produce reproducible builds across platforms. """ import argparse +import json import os import sys import tempfile @@ -59,16 +74,18 @@ def parse_args(): enabling/disabling options. Run 'wasm-split -h' for the list of options. But you should NOT add --manifest, because this will be generated from this script. """) - parser.add_argument('wasm', help='Path to the input wasm file') - parser.add_argument('paths_file', help='Path to the input file containing paths') + parser.add_argument('wasm', nargs='?', help='Path to the input wasm file') + parser.add_argument('paths_file', nargs='?', help='Path to the input file containing paths') parser.add_argument('-s', '--sourcemap', help='Force source map file') parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose info for debugging this script') parser.add_argument('--wasm-split', help='Path to wasm-split executable') parser.add_argument('--preserve-manifest', action='store_true', help='Preserve generated manifest file. This sets --verbose too.') - args, forwarded_args = parser.parse_known_args() + parser.add_argument('--print-sources', action='store_true', + help='Print the list of sources in the source map to help figure out splitting boundaries. Does NOT perform the splitting.') + args, forwarded_args = parser.parse_known_args() if args.preserve_manifest: args.verbose = True if not args.wasm_split: @@ -76,6 +93,16 @@ def parse_args(): if '--manifest' in forwarded_args: parser.error('manifest file will be generated by this script and should not be given') + + if args.print_sources: + if not args.wasm and not args.sourcemap: + parser.error('--print-sources requires either wasm or --sourcemap') + return args, forwarded_args + + if not args.wasm and not args.paths_file: + parser.error("the following arguments are required: wasm, paths_file") + if not args.paths_file: + parser.error("the following arguments are required: paths_file") if '-o' not in forwarded_args and '--output' not in forwarded_args: parser.error('-o (--output) is required') return args, forwarded_args @@ -88,22 +115,33 @@ def check_errors(args): exit_with_error(f"'{args.paths_file}' was not found or not a file") if args.sourcemap: - if not os.path.isfile(args.sourcemap): - exit_with_error(f"'{args.sourcemap}' was not found or not a file") + sourcemap = args.sourcemap if args.wasm: with webassembly.Module(args.wasm) as module: - if not args.sourcemap and not emsymbolizer.get_sourceMappingURL_section(module): - exit_with_error('sourceMappingURL section does not exist') - sourcemap = module.get_sourceMappingURL() - if not os.path.isfile(sourcemap): - exit_with_error(f"'{sourcemap}' was not found or not a file") + if not args.sourcemap: + if not emsymbolizer.get_sourceMappingURL_section(module): + exit_with_error('sourceMappingURL section does not exist') + sourcemap = module.get_sourceMappingURL() if not module.has_name_section(): - exit_with_error('Name section does not eixst') + exit_with_error('Name section does not exist') + if not os.path.isfile(sourcemap): + exit_with_error(f"'{sourcemap}' was not found or not a file") if not os.path.isfile(args.wasm_split): exit_with_error(f"'{args.wasm_split}' was not found or not a file") + # Check source map validity. Just perform simple checks to make sure mandatory + # fields exist. + try: + with open(sourcemap) as f: + source_map_data = json.load(f) + except json.JSONDecodeError: + exit_with_error(f'Invalid JSON format in file {args.sourcemap}') + for field in ['version', 'sources', 'mappings']: + if field not in source_map_data: + exit_with_error(f"Field '{field}' is missing in the source map") + def get_sourceMappingURL(wasm, arg_sourcemap): if arg_sourcemap: @@ -112,6 +150,14 @@ def get_sourceMappingURL(wasm, arg_sourcemap): return module.get_sourceMappingURL() +def print_sources(sourcemap): + with open(sourcemap) as f: + sources = json.load(f).get('sources') + assert(isinstance(sources, list)) + for src in sources: + print(src) + + def get_path_to_functions_map(wasm, sourcemap, paths): def is_synthesized_func(func): # TODO There can be more @@ -202,6 +248,9 @@ def main(): check_errors(args) sourcemap = get_sourceMappingURL(args.wasm, args.sourcemap) + if args.print_sources: + print_sources(sourcemap) + return paths = utils.read_file(args.paths_file).splitlines() paths = [utils.normalize_path(path.strip()) for path in paths if path.strip()] @@ -221,7 +270,7 @@ def main(): if not path_to_funcs[path]: diagnostics.warn(f'{path} does not match any functions') if args.verbose: - print(path) + print(f'{path}: {len(path_to_funcs[path])} functions') for func in path_to_funcs[path]: print(' ' + func) print()