From 7d92cafe00e4bc6d14a2d6d559329bf825600f38 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Wed, 24 Sep 2025 01:03:53 +0000 Subject: [PATCH 1/6] [empath-split] Add --print-sources option The 'paths' file should contain paths that match those of `sources` field in the source map, which can be hard to figure out because they can be relative to some build directory. This adds `--print-sources` option, which prints the contents of the `sources` field. You can just manually open source map files to obtain the same information, but they are usually hard to read without any newlines. You also can use a general JSON prettyprinting tool, but I think it wouldn't hurt to provide the same info handy here. --- test/test_other.py | 6 ++++ tools/empath-split.py | 65 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/test/test_other.py b/test/test_other.py index 60f11f147f737..1418ee4701d82 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -15639,3 +15639,9 @@ def has_defined_function(file, func): # /emsdk/emscripten/system/lib/libcxx self.assertTrue(has_defined_function('test_4.wasm', r'std::__2::ios_base::getloc\\28\\29\\20const')) self.assertTrue(has_defined_function('test_4.wasm', r'std::uncaught_exceptions\\28\\29')) + + # Check --print-sources option + out = self.run_process([empath_split, 'test.wasm', '--print-sources'], stdout=PIPE).stdout + self.assertIn('main.cpp', out) + self.assertIn('foo.cpp', out) + self.assertIn('/emsdk/emscripten/system/lib/libc/musl/src/string/strcmp.c', out) diff --git a/tools/empath-split.py b/tools/empath-split.py index e7e1ef57db888..6d539b33f22bc 100755 --- a/tools/empath-split.py +++ b/tools/empath-split.py @@ -28,9 +28,24 @@ split as the inner path's module, and the rest of the functions will be split as the outer path's module. Functions that do not belong to any of the specified paths will remain in the primary module. + +The paths in the paths file can be either absolute or relative, but they should +match those of 'sources' field in the source map file. Sometimes a source map's +'sources' field contains paths relative to a build directory, so source files +may be recorded as '../src/subdir/test.c', for example. In this case, if you +want to split the directory src/subdir, you should list it as ../src/subdir. You +can manually open the source map file and check 'sources' field, but we also an +option to help that. You can do like +$ empath-split --print-sources test.wasm +or +$ empath-split --print-sources --source-map test.wasm.map +to print the list of sources in 'sources' field in the source map. Note that +emscripten's libraries' source files have /emsdk/emscripten prefix, which is a +fake deterministic prefix to produce reproducible builds across platforms. """ import argparse +import json import os import sys import tempfile @@ -59,16 +74,18 @@ def parse_args(): enabling/disabling options. Run 'wasm-split -h' for the list of options. But you should NOT add --manifest, because this will be generated from this script. """) - parser.add_argument('wasm', help='Path to the input wasm file') - parser.add_argument('paths_file', help='Path to the input file containing paths') + parser.add_argument('wasm', nargs='?', help='Path to the input wasm file') + parser.add_argument('paths_file', nargs='?', help='Path to the input file containing paths') parser.add_argument('-s', '--sourcemap', help='Force source map file') parser.add_argument('-v', '--verbose', action='store_true', help='Print verbose info for debugging this script') parser.add_argument('--wasm-split', help='Path to wasm-split executable') parser.add_argument('--preserve-manifest', action='store_true', help='Preserve generated manifest file. This sets --verbose too.') - args, forwarded_args = parser.parse_known_args() + parser.add_argument('--print-sources', action='store_true', + help='Print the list of sources in the source map to help figure out splitting boundaries. Does NOT perform the splitting.') + args, forwarded_args = parser.parse_known_args() if args.preserve_manifest: args.verbose = True if not args.wasm_split: @@ -76,6 +93,16 @@ def parse_args(): if '--manifest' in forwarded_args: parser.error('manifest file will be generated by this script and should not be given') + + if args.print_sources: + if not args.wasm and not args.sourcemap: + parser.error('--print-sources requires either wasm or --sourcemap') + return args, forwarded_args + + if not args.wasm and not args.paths_file: + parser.error("the following arguments are required: wasm, paths_file") + if not args.paths_file: + parser.error("the following arguments are required: paths_file") if '-o' not in forwarded_args and '--output' not in forwarded_args: parser.error('-o (--output) is required') return args, forwarded_args @@ -90,12 +117,16 @@ def check_errors(args): if args.sourcemap: if not os.path.isfile(args.sourcemap): exit_with_error(f"'{args.sourcemap}' was not found or not a file") + sourcemap = args.sourcemap if args.wasm: with webassembly.Module(args.wasm) as module: - if not args.sourcemap and not emsymbolizer.get_sourceMappingURL_section(module): - exit_with_error('sourceMappingURL section does not exist') - sourcemap = module.get_sourceMappingURL() + if args.sourcemap: + sourcemap = args.sourcemap + else: + if not emsymbolizer.get_sourceMappingURL_section(module): + exit_with_error('sourceMappingURL section does not exist') + sourcemap = module.get_sourceMappingURL() if not os.path.isfile(sourcemap): exit_with_error(f"'{sourcemap}' was not found or not a file") if not module.has_name_section(): @@ -104,6 +135,17 @@ def check_errors(args): if not os.path.isfile(args.wasm_split): exit_with_error(f"'{args.wasm_split}' was not found or not a file") + # Check source map validity. Just perform simple checks to make sure mandatory + # fields exist. + try: + with open(sourcemap) as f: + source_map_data = json.load(f) + except json.JSONDecodeError: + exit_with_error(f'Invalid JSON format in file {args.sourcemap}') + for field in ['version', 'sources', 'mappings']: + if field not in source_map_data: + exit_with_error(f"Field '{field}' is missing in the source map") + def get_sourceMappingURL(wasm, arg_sourcemap): if arg_sourcemap: @@ -112,6 +154,14 @@ def get_sourceMappingURL(wasm, arg_sourcemap): return module.get_sourceMappingURL() +def print_sources(sourcemap): + with open(sourcemap, 'r') as f: + sources = json.load(f).get('sources') + assert(isinstance(sources, list)) + for src in sources: + print(src) + + def get_path_to_functions_map(wasm, sourcemap, paths): def is_synthesized_func(func): # TODO There can be more @@ -202,6 +252,9 @@ def main(): check_errors(args) sourcemap = get_sourceMappingURL(args.wasm, args.sourcemap) + if args.print_sources: + print_sources(sourcemap) + return paths = utils.read_file(args.paths_file).splitlines() paths = [utils.normalize_path(path.strip()) for path in paths if path.strip()] From 201a5a96353f475ee39cddd14a9011da11869283 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 25 Sep 2025 17:16:42 +0000 Subject: [PATCH 2/6] ruff fix --- tools/empath-split.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/empath-split.py b/tools/empath-split.py index 6d539b33f22bc..420a118c2a0ae 100755 --- a/tools/empath-split.py +++ b/tools/empath-split.py @@ -155,7 +155,7 @@ def get_sourceMappingURL(wasm, arg_sourcemap): def print_sources(sourcemap): - with open(sourcemap, 'r') as f: + with open(sourcemap) as f: sources = json.load(f).get('sources') assert(isinstance(sources, list)) for src in sources: From a5dea828c715cff83461a40ecfe3814ce3f26ccc Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 25 Sep 2025 17:26:25 +0000 Subject: [PATCH 3/6] Remove a redundant check --- tools/empath-split.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/empath-split.py b/tools/empath-split.py index 420a118c2a0ae..578a572c64e5c 100755 --- a/tools/empath-split.py +++ b/tools/empath-split.py @@ -121,17 +121,15 @@ def check_errors(args): if args.wasm: with webassembly.Module(args.wasm) as module: - if args.sourcemap: - sourcemap = args.sourcemap - else: + if not args.sourcemap: if not emsymbolizer.get_sourceMappingURL_section(module): exit_with_error('sourceMappingURL section does not exist') sourcemap = module.get_sourceMappingURL() - if not os.path.isfile(sourcemap): - exit_with_error(f"'{sourcemap}' was not found or not a file") if not module.has_name_section(): exit_with_error('Name section does not eixst') + if not os.path.isfile(sourcemap): + exit_with_error(f"'{sourcemap}' was not found or not a file") if not os.path.isfile(args.wasm_split): exit_with_error(f"'{args.wasm_split}' was not found or not a file") From 7b9f1551dcbca3cbc14ea431c760a297ef1eb75d Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 25 Sep 2025 17:28:46 +0000 Subject: [PATCH 4/6] Remove redundnant check 2 --- tools/empath-split.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/empath-split.py b/tools/empath-split.py index 578a572c64e5c..7fdef23445856 100755 --- a/tools/empath-split.py +++ b/tools/empath-split.py @@ -115,8 +115,6 @@ def check_errors(args): exit_with_error(f"'{args.paths_file}' was not found or not a file") if args.sourcemap: - if not os.path.isfile(args.sourcemap): - exit_with_error(f"'{args.sourcemap}' was not found or not a file") sourcemap = args.sourcemap if args.wasm: From 56745d6c23fe8a48e044224f8619ec2f6bb49a75 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 25 Sep 2025 17:30:12 +0000 Subject: [PATCH 5/6] typo fix --- tools/empath-split.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/empath-split.py b/tools/empath-split.py index 7fdef23445856..10195e5918c90 100755 --- a/tools/empath-split.py +++ b/tools/empath-split.py @@ -124,7 +124,7 @@ def check_errors(args): exit_with_error('sourceMappingURL section does not exist') sourcemap = module.get_sourceMappingURL() if not module.has_name_section(): - exit_with_error('Name section does not eixst') + exit_with_error('Name section does not exist') if not os.path.isfile(sourcemap): exit_with_error(f"'{sourcemap}' was not found or not a file") From 61eeb49b512ceba817ed3072eae5c69689e7ba6f Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 25 Sep 2025 17:35:32 +0000 Subject: [PATCH 6/6] Print # of functions per path --- tools/empath-split.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/empath-split.py b/tools/empath-split.py index 10195e5918c90..3fabebb0ce883 100755 --- a/tools/empath-split.py +++ b/tools/empath-split.py @@ -270,7 +270,7 @@ def main(): if not path_to_funcs[path]: diagnostics.warn(f'{path} does not match any functions') if args.verbose: - print(path) + print(f'{path}: {len(path_to_funcs[path])} functions') for func in path_to_funcs[path]: print(' ' + func) print()