Skip to content

Commit 522fd7c

Browse files
fmeumgregmagolan
authored andcommitted
fix(builtin): make linker aspect run in constant time per target
Previously, for every transitive dependency of a node binary target, the module_mappings_aspect had a runtime quadratic in the number of mappings: It traverses the mappings of all direct dependencies in a loop and called _link_mapping in its body, which again traverses the mappings. With this commit, the LinkerPackageMappingInfo provider is restructured to hold depsets of mapping entries and node_modules root paths. In this way the aspect invocations for the individual targets never traverse the (partially constructed) mappings. Instead, write_node_modules_manifest flattens the depset only after the aspect has run. If this should ever become a performance bottleneck again, the flattening could be moved into the execution phase by letting a dedicated binary consume the depset wrappen in an Args object. In an experiment with a single medium-sized package.json and a Node binary depending on all targets in the corresponding npm repository, this commit reduces the analysis wall time spent in the aspect implementation from ~30s to ~70ms.
1 parent 6d712cd commit 522fd7c

File tree

4 files changed

+134
-114
lines changed

4 files changed

+134
-114
lines changed

internal/linker/link_node_modules.bzl

Lines changed: 119 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,17 @@ def _debug(vars, *args):
2121
LinkerPackageMappingInfo = provider(
2222
doc = """Provider capturing package mappings for the linker to consume.""",
2323
fields = {
24-
"mappings": "Dictionary of mappings. Maps package names to an exec path.",
24+
"mappings": """Depset of structs with mapping info.
25+
26+
Each struct has the following fields:
27+
package_name: The name of the package.
28+
package_path: The root path of the node_modules under which this package should be linked.
29+
link_path: The exec path under which the package is available.
30+
31+
Note: The depset may contain multiple entries per (package_name, package_path) pair.
32+
Consumers should handle these duplicated appropriately. The depset uses topological order to ensure
33+
that a target's mappings come before possibly conflicting mappings of its dependencies.""",
34+
"node_modules_roots": "Depset of node_module roots.",
2535
},
2636
)
2737

@@ -40,39 +50,54 @@ def add_arg(args, arg):
4050
else:
4151
args.add(arg)
4252

43-
def _link_mapping(label, mappings, k, v):
44-
# Check that two package name mapping do not map to two different source paths
45-
k_segs = k.split(":")
46-
package_name = k_segs[0]
47-
package_path = k_segs[1] if len(k_segs) > 1 else ""
48-
link_path = v
49-
50-
for iter_key, iter_values in mappings.items():
51-
# Map key is of format "package_name:package_path"
52-
# Map values are of format [deprecated, link_path]
53-
iter_segs = iter_key.split(":")
54-
iter_package_name = iter_segs[0]
55-
iter_package_path = iter_segs[1] if len(iter_segs) > 1 else ""
56-
iter_source_path = iter_values
57-
if package_name == iter_package_name and package_path == iter_package_path:
58-
# If we're linking to the output tree be tolerant of linking to different
59-
# output trees since we can have "static" links that come from cfg="exec" binaries.
60-
# In the future when we static link directly into runfiles without the linker
61-
# we can remove this logic.
62-
link_path_segments = link_path.split("/")
63-
iter_source_path_segments = iter_source_path.split("/")
64-
bin_links = len(link_path_segments) >= 3 and len(iter_source_path_segments) >= 3 and link_path_segments[0] == "bazel-out" and iter_source_path_segments[0] == "bazel-out" and link_path_segments[2] == "bin" and iter_source_path_segments[2] == "bin"
65-
if bin_links:
66-
compare_link_path = "/".join(link_path_segments[3:]) if len(link_path_segments) > 3 else ""
67-
compare_iter_source_path = "/".join(iter_source_path_segments[3:]) if len(iter_source_path_segments) > 3 else ""
68-
else:
69-
compare_link_path = link_path
70-
compare_iter_source_path = iter_source_path
71-
if compare_link_path != compare_iter_source_path:
72-
msg = "conflicting mapping at '%s': '%s' and '%s' map to conflicting %s and %s" % (label, k, iter_key, compare_link_path, compare_iter_source_path)
73-
fail(msg)
74-
75-
return True
53+
def _detect_conflicts(module_sets, mapping):
54+
"""Verifies that the new mapping does not conflict with existing mappings in module_sets."""
55+
if mapping.package_path not in module_sets:
56+
return
57+
existing_link_path = module_sets[mapping.package_path].get(mapping.package_name)
58+
if existing_link_path == None:
59+
return
60+
61+
# If we're linking to the output tree be tolerant of linking to different
62+
# output trees since we can have "static" links that come from cfg="exec" binaries.
63+
# In the future when we static link directly into runfiles without the linker
64+
# we can remove this logic.
65+
link_path_segments = mapping.link_path.split("/")
66+
existing_link_path_segments = existing_link_path.split("/")
67+
bin_links = len(link_path_segments) >= 3 and len(existing_link_path_segments) >= 3 and link_path_segments[0] == "bazel-out" and existing_link_path_segments[0] == "bazel-out" and link_path_segments[2] == "bin" and existing_link_path_segments[2] == "bin"
68+
if bin_links:
69+
compare_link_path = "/".join(link_path_segments[3:]) if len(link_path_segments) > 3 else ""
70+
compare_existing_link_path = "/".join(existing_link_path_segments[3:]) if len(existing_link_path_segments) > 3 else ""
71+
else:
72+
compare_link_path = mapping.link_path
73+
compare_existing_link_path = existing_link_path
74+
if compare_link_path != compare_existing_link_path:
75+
msg = "conflicting mapping: '%s' (package path: %s) maps to conflicting paths '%s' and '%s'" % (mapping.package_name, mapping.package_path, compare_link_path, compare_existing_link_path)
76+
fail(msg)
77+
78+
def _flatten_to_module_set(mappings_depset):
79+
"""Convert a depset of mapping to a module sets (modules per package package_path).
80+
81+
The returned dictionary has the following structure:
82+
{
83+
"package_path": {
84+
"package_name": "link_path",
85+
...
86+
},
87+
...
88+
}
89+
"""
90+
91+
# FIXME: Flattens a depset during the analysis phase. Ideally, this would be done during the
92+
# execution phase using an Args object.
93+
flattens_mappings = mappings_depset.to_list()
94+
module_sets = {}
95+
for mapping in flattens_mappings:
96+
_detect_conflicts(module_sets, mapping)
97+
if mapping.package_path not in module_sets:
98+
module_sets[mapping.package_path] = {}
99+
module_sets[mapping.package_path][mapping.package_name] = mapping.link_path
100+
return module_sets
76101

77102
def write_node_modules_manifest(ctx, extra_data = [], mnemonic = None, link_workspace_root = False):
78103
"""Writes a manifest file read by the linker, containing info about resolving runtime dependencies
@@ -85,7 +110,6 @@ def write_node_modules_manifest(ctx, extra_data = [], mnemonic = None, link_work
85110
If source files need to be required then they can be copied to the bin_dir with copy_to_bin.
86111
"""
87112

88-
mappings = {ctx.workspace_name: ctx.bin_dir.path} if link_workspace_root else {}
89113
node_modules_roots = {}
90114

91115
# Look through data/deps attributes to find the root directories for the third-party node_modules;
@@ -100,38 +124,33 @@ def write_node_modules_manifest(ctx, extra_data = [], mnemonic = None, link_work
100124
fail("All npm dependencies at the path '%s' must come from a single workspace. Found '%s' and '%s'." % (path, other_workspace, workspace))
101125
node_modules_roots[path] = workspace
102126

103-
# Look through data/deps attributes to find first party deps to link
127+
direct_mappings = []
128+
direct_node_modules_roots = []
129+
if link_workspace_root:
130+
direct_mappings.append(struct(
131+
package_name = ctx.workspace_name,
132+
package_path = "",
133+
link_path = ctx.bin_dir.path,
134+
))
135+
direct_node_modules_roots.append("")
136+
137+
transitive_mappings = []
138+
transitive_node_modules_roots = []
104139
for dep in extra_data + getattr(ctx.attr, "data", []) + getattr(ctx.attr, "deps", []):
105140
if not LinkerPackageMappingInfo in dep:
106141
continue
142+
transitive_mappings.append(dep[LinkerPackageMappingInfo].mappings)
143+
transitive_node_modules_roots.append(dep[LinkerPackageMappingInfo].node_modules_roots)
107144

108-
for k, v in dep[LinkerPackageMappingInfo].mappings.items():
109-
map_key_split = k.split(":")
110-
package_name = map_key_split[0]
111-
package_path = map_key_split[1] if len(map_key_split) > 1 else ""
112-
if package_path not in node_modules_roots:
113-
node_modules_roots[package_path] = ""
114-
if _link_mapping(dep.label, mappings, k, v):
115-
_debug(ctx.var, "Linking %s: %s" % (k, v))
116-
mappings[k] = v
117-
118-
# Convert mappings to a module sets (modules per package package_path)
119-
# {
120-
# "package_path": {
121-
# "package_name": "link_path",
122-
# ...
123-
# },
124-
# ...
125-
# }
126-
module_sets = {}
127-
for k, v in mappings.items():
128-
map_key_split = k.split(":")
129-
package_name = map_key_split[0]
130-
package_path = map_key_split[1] if len(map_key_split) > 1 else ""
131-
link_path = v
132-
if package_path not in module_sets:
133-
module_sets[package_path] = {}
134-
module_sets[package_path][package_name] = link_path
145+
mappings = depset(direct_mappings, transitive = transitive_mappings, order = "topological")
146+
module_sets = _flatten_to_module_set(mappings)
147+
148+
# FIXME: Flattens a depset during the analysis phase. Ideally, this would be done during the
149+
# execution phase using an Args object.
150+
linker_node_modules_roots = depset(direct_node_modules_roots, transitive = transitive_node_modules_roots).to_list()
151+
for node_modules_root in linker_node_modules_roots:
152+
if node_modules_root not in node_modules_roots:
153+
node_modules_roots[node_modules_root] = ""
135154

136155
# Write the result to a file, and use the magic node option --bazel_node_modules_manifest
137156
# The launcher.sh will peel off this argument and pass it to the linker rather than the program.
@@ -148,58 +167,60 @@ def write_node_modules_manifest(ctx, extra_data = [], mnemonic = None, link_work
148167
ctx.actions.write(modules_manifest, str(content))
149168
return modules_manifest
150169

151-
def _get_module_mappings(target, ctx):
152-
"""Gathers module mappings from LinkablePackageInfo which maps "package_name:package_path" to link_path.
170+
def _get_linker_package_mapping_info(target, ctx):
171+
"""Transitively gathers module mappings and node_modules roots from LinkablePackageInfo.
153172
154173
Args:
155174
target: target
156175
ctx: ctx
157176
158177
Returns:
159-
Returns module mappings of shape:
160-
{ "package_name:package_path": link_path, ... }
178+
A LinkerPackageMappingInfo provider that contains the mappings and roots for the current
179+
target and all its transitive dependencies.
161180
"""
162-
mappings = {}
163181

164-
# Propagate transitive mappings
182+
transitive_mappings = []
183+
transitive_node_modules_roots = []
165184
for name in _MODULE_MAPPINGS_DEPS_NAMES:
166185
for dep in getattr(ctx.rule.attr, name, []):
167186
if not LinkerPackageMappingInfo in dep:
168187
continue
188+
transitive_mappings.append(dep[LinkerPackageMappingInfo].mappings)
189+
transitive_node_modules_roots.append(dep[LinkerPackageMappingInfo].node_modules_roots)
169190

170-
for k, v in dep[LinkerPackageMappingInfo].mappings.items():
171-
if _link_mapping(target.label, mappings, k, v):
172-
_debug(ctx.var, "target %s propagating module mapping %s: %s" % (dep.label, k, v))
173-
mappings[k] = v
191+
direct_mappings = []
192+
direct_node_modules_roots = []
174193

175194
# Look for LinkablePackageInfo mapping in this node
195+
# LinkablePackageInfo may be provided without a package_name so check for that case as well
176196
if not LinkablePackageInfo in target:
177-
# No mappings contributed here, short-circuit with the transitive ones we collected
178197
_debug(ctx.var, "No LinkablePackageInfo for", target.label)
179-
return mappings
180-
181-
linkable_package_info = target[LinkablePackageInfo]
182-
183-
# LinkablePackageInfo may be provided without a package_name so check for that case as well
184-
if not linkable_package_info.package_name:
185-
# No mappings contributed here, short-circuit with the transitive ones we collected
198+
elif not target[LinkablePackageInfo].package_name:
186199
_debug(ctx.var, "No package_name in LinkablePackageInfo for", target.label)
187-
return mappings
188-
189-
package_path = linkable_package_info.package_path if hasattr(linkable_package_info, "package_path") else ""
190-
map_key = "%s:%s" % (linkable_package_info.package_name, package_path)
191-
map_value = linkable_package_info.path
192-
193-
if _link_mapping(target.label, mappings, map_key, map_value):
194-
_debug(ctx.var, "target %s adding module mapping %s: %s" % (target.label, map_key, map_value))
195-
mappings[map_key] = map_value
196-
197-
# Returns mappings of shape:
198-
# {
199-
# "package_name:package_path": link_path,
200-
# ...
201-
# }
202-
return mappings
200+
else:
201+
linkable_package_info = target[LinkablePackageInfo]
202+
package_path = linkable_package_info.package_path if hasattr(linkable_package_info, "package_path") else ""
203+
direct_mappings.append(
204+
struct(
205+
package_name = linkable_package_info.package_name,
206+
package_path = package_path,
207+
link_path = linkable_package_info.path,
208+
),
209+
)
210+
_debug(ctx.var, "target %s (package path: %s) adding module mapping %s: %s" % (
211+
target.label,
212+
package_path,
213+
linkable_package_info.package_name,
214+
linkable_package_info.path,
215+
))
216+
direct_node_modules_roots.append(package_path)
217+
218+
mappings = depset(direct_mappings, transitive = transitive_mappings, order = "topological")
219+
node_modules_roots = depset(direct_node_modules_roots, transitive = transitive_node_modules_roots)
220+
return LinkerPackageMappingInfo(
221+
mappings = mappings,
222+
node_modules_roots = node_modules_roots,
223+
)
203224

204225
def _module_mappings_aspect_impl(target, ctx):
205226
# If the target explicitly provides mapping information, we will not propagate
@@ -209,7 +230,7 @@ def _module_mappings_aspect_impl(target, ctx):
209230
return []
210231

211232
return [
212-
LinkerPackageMappingInfo(mappings = _get_module_mappings(target, ctx)),
233+
_get_linker_package_mapping_info(target, ctx),
213234
]
214235

215236
module_mappings_aspect = aspect(

internal/node/node.bzl

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,10 @@ def _compute_node_modules_roots(ctx, data):
5959
# Add in roots for multi-linked npm deps
6060
for dep in data:
6161
if LinkerPackageMappingInfo in dep:
62-
for k, v in dep[LinkerPackageMappingInfo].mappings.items():
63-
map_key_split = k.split(":")
64-
package_name = map_key_split[0]
65-
package_path = map_key_split[1] if len(map_key_split) > 1 else ""
66-
if package_path not in node_modules_roots:
67-
node_modules_roots[package_path] = ""
62+
linker_node_modules_roots = dep[LinkerPackageMappingInfo].node_modules_roots.to_list()
63+
for node_modules_root in linker_node_modules_roots:
64+
if node_modules_root not in node_modules_roots:
65+
node_modules_roots[node_modules_root] = ""
6866

6967
return node_modules_roots
7068

internal/providers/node_runtime_deps_info.bzl

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,10 @@ def _compute_node_modules_roots(ctx):
6666
# Add in roots for multi-linked npm deps
6767
for dep in deps:
6868
if LinkerPackageMappingInfo in dep:
69-
for k, v in dep[LinkerPackageMappingInfo].mappings.items():
70-
map_key_split = k.split(":")
71-
package_name = map_key_split[0]
72-
package_path = map_key_split[1] if len(map_key_split) > 1 else ""
73-
if package_path not in node_modules_roots:
74-
node_modules_roots[package_path] = ""
69+
linker_node_modules_roots = dep[LinkerPackageMappingInfo].node_modules_roots.to_list()
70+
for node_modules_root in linker_node_modules_roots:
71+
if node_modules_root not in node_modules_roots:
72+
node_modules_roots[node_modules_root] = ""
7573

7674
return node_modules_roots
7775

packages/esbuild/esbuild.bzl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,13 @@ def _esbuild_impl(ctx):
3737

3838
# Collect the path alias mapping to resolve packages correctly
3939
if LinkerPackageMappingInfo in dep:
40-
for key, value in dep[LinkerPackageMappingInfo].mappings.items():
41-
# key is of format "package_name:package_path"
42-
package_name = key.split(":")[0]
43-
path_alias_mappings.update(generate_path_mapping(package_name, value.replace(ctx.bin_dir.path + "/", "")))
40+
for mapping in dep[LinkerPackageMappingInfo].mappings.to_list():
41+
path_alias_mappings.update(
42+
generate_path_mapping(
43+
mapping.package_name,
44+
mapping.link_path.replace(ctx.bin_dir.path + "/", ""),
45+
),
46+
)
4447

4548
entry_points = desugar_entry_point_names(ctx.file.entry_point, ctx.files.entry_points)
4649

0 commit comments

Comments
 (0)