This repository has been archived by the owner on Jan 18, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
main.rs
352 lines (302 loc) · 10.8 KB
/
main.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
#![feature(hash_drain_filter)]
#![feature(drain_filter)]
extern crate petgraph;
use std::collections::{HashSet, HashMap, LinkedList};
use petgraph::Graph;
use petgraph::prelude::{NodeIndex, Incoming};
use petgraph::visit::{depth_first_search, DfsEvent, Control};
use petgraph::dot::{Dot};
#[derive(Debug, PartialEq, Eq, Hash)]
enum AssetType {
JavaScript,
CSS,
HTML
}
#[derive(Debug, PartialEq, Eq, Hash)]
struct Asset<'a> {
name: &'a str,
asset_type: AssetType,
size: usize
}
#[derive(Debug)]
struct Dependency {
is_async: bool
}
#[derive(Debug, Default)]
struct Bundle {
asset_ids: Vec<NodeIndex>,
size: usize,
source_bundles: Vec<NodeIndex>
}
impl Bundle {
fn from_asset(asset_id: NodeIndex, asset: &Asset) -> Self {
Bundle {
asset_ids: vec![asset_id],
size: asset.size,
source_bundles: vec![]
}
}
}
fn main() {
let (g, entries) = build_graph();
println!("{:?}", Dot::new(&g));
let mut bundle_roots = HashMap::new();
let mut reachable_bundles = HashSet::new();
let mut bundle_graph = Graph::new();
// Step 1: Create bundles at the explicit split points in the graph.
// Create bundles for each entry.
for entry in &entries {
let bundle_id = bundle_graph.add_node(Bundle::from_asset(*entry, &g[*entry]));
bundle_roots.insert(*entry, (bundle_id, bundle_id));
}
// Traverse the asset graph and create bundles for asset type changes and async dependencies.
// This only adds the entry asset of each bundle, not the subgraph.
let mut stack = LinkedList::new();
depth_first_search(&g, entries, |event| {
match event {
DfsEvent::Discover(asset_id, _) => {
// Push to the stack when a new bundle is created.
if let Some((_, bundle_group_id)) = bundle_roots.get(&asset_id) {
stack.push_front((asset_id, *bundle_group_id));
}
}
DfsEvent::TreeEdge(u, v) => {
let asset_a = &g[u];
let asset_b = &g[v];
// Create a new bundle when the asset type changes.
if asset_a.asset_type != asset_b.asset_type {
let (_, bundle_group_id) = stack.front().unwrap();
let bundle_id = bundle_graph.add_node(Bundle::from_asset(v, &g[v]));
bundle_roots.insert(v, (bundle_id, *bundle_group_id));
// Add an edge from the bundle group entry to the new bundle.
// This indicates that the bundle is loaded together with the entry.
bundle_graph.add_edge(*bundle_group_id, bundle_id, 0);
return
}
// Create a new bundle as well as a new bundle group if the dependency is async.
let dependency = &g[g.find_edge(u, v).unwrap()];
if dependency.is_async {
let bundle_id = bundle_graph.add_node(Bundle::from_asset(v, &g[v]));
bundle_roots.insert(v, (bundle_id, bundle_id));
// Walk up the stack until we hit a different asset type
// and mark each this bundle as reachable from every parent bundle.
for (b, _) in &stack {
let a = &g[*b];
if a.asset_type != asset_b.asset_type {
break
}
reachable_bundles.insert((*b, v));
}
}
}
DfsEvent::Finish(n, _) => {
// Pop the stack when existing the asset node that created a bundle.
if let Some((s, _)) = stack.front() {
if *s == n {
stack.pop_front();
}
}
}
_ => {}
}
});
println!("roots {:?}", bundle_roots);
println!("reachable {:?}", reachable_bundles);
println!("initial bundle graph {:?}", Dot::new(&bundle_graph));
// Step 2: Determine reachability for every asset from each bundle root.
// This is later used to determine which bundles to place each asset in.
let mut reachable_nodes = HashSet::new();
for (root, _) in &bundle_roots {
depth_first_search(&g, Some(*root), |event| {
if let DfsEvent::Discover(n, _) = &event {
if n == root {
return Control::Continue
}
// Stop when we hit another bundle root.
if bundle_roots.contains_key(&n) {
return Control::<()>::Prune;
}
reachable_nodes.insert((*root, *n));
}
Control::Continue
});
}
let reachable_graph = Graph::<(), ()>::from_edges(&reachable_nodes);
println!("{:?}", Dot::new(&reachable_graph));
// Step 3: Place all assets into bundles. Each asset is placed into a single
// bundle based on the bundle entries it is reachable from. This creates a
// maximally code split bundle graph with no duplication.
// Create a mapping from entry asset ids to bundle ids.
let mut bundles: HashMap<Vec<NodeIndex>, NodeIndex> = HashMap::new();
for asset_id in g.node_indices() {
// Find bundle entries reachable from the asset.
let reachable: Vec<NodeIndex> = reachable_graph.neighbors_directed(asset_id, Incoming).collect();
// Filter out bundles when the asset is reachable in a parent bundle.
let reachable: Vec<NodeIndex> = reachable.iter().cloned().filter(|b| {
(&reachable).into_iter().all(|a| !reachable_bundles.contains(&(*a, *b)))
}).collect();
if let Some((bundle_id, _)) = bundle_roots.get(&asset_id) {
// If the asset is a bundle root, add the bundle to every other reachable bundle group.
bundles.entry(vec![asset_id]).or_insert(*bundle_id);
for a in &reachable {
if *a != asset_id {
bundle_graph.add_edge(bundle_roots[a].1, *bundle_id, 0);
}
}
} else if reachable.len() > 0 {
// If the asset is reachable from more than one entry, find or create
// a bundle for that combination of entries, and add the asset to it.
let source_bundles = reachable.iter().map(|a| bundles[&vec![*a]]).collect();
let bundle_id = bundles.entry(reachable.clone()).or_insert_with(|| {
let mut bundle = Bundle::default();
bundle.source_bundles = source_bundles;
bundle_graph.add_node(bundle)
});
let bundle = &mut bundle_graph[*bundle_id];
bundle.asset_ids.push(asset_id);
bundle.size += g[asset_id].size;
// Add the bundle to each reachable bundle group.
for a in reachable {
if a != *bundle_id {
bundle_graph.add_edge(bundle_roots[&a].1, *bundle_id, 0);
}
}
}
}
// Step 4: Remove shared bundles that are smaller than the minimum size,
// and add the assets to the original source bundles they were referenced from.
// This may result in duplication of assets in multiple bundles.
for bundle_id in bundle_graph.node_indices() {
let bundle = &bundle_graph[bundle_id];
if bundle.source_bundles.len() > 0 && bundle.size < 10 {
remove_bundle(&g, &mut bundle_graph, bundle_id);
}
}
// Step 5: Remove shared bundles from bundle groups that hit the parallel request limit.
let limit = 3;
for (_, (bundle_id, bundle_group_id)) in bundle_roots {
// Only handle bundle group entries.
if bundle_id != bundle_group_id {
continue;
}
// Find the bundles in this bundle group.
let mut neighbors: Vec<NodeIndex> = bundle_graph.neighbors(bundle_group_id).collect();
if neighbors.len() > limit {
// Sort the bundles so the smallest ones are removed first.
neighbors.sort_by(|a, b| bundle_graph[*a].size.cmp(&bundle_graph[*b].size));
// Remove bundles until the bundle group is within the parallel request limit.
for bundle_id in &neighbors[0..neighbors.len() - limit] {
// Add all assets in the shared bundle into the source bundles that are within this bundle group.
let source_bundles: Vec<NodeIndex> = bundle_graph[*bundle_id].source_bundles.drain_filter(|s| neighbors.contains(s)).collect();
for source in source_bundles {
for asset_id in bundle_graph[*bundle_id].asset_ids.clone() {
let bundle_id = bundles[&vec![source]];
let bundle = &mut bundle_graph[bundle_id];
bundle.asset_ids.push(asset_id);
bundle.size += g[asset_id].size;
}
}
// Remove the edge from this bundle group to the shared bundle.
bundle_graph.remove_edge(bundle_graph.find_edge(bundle_group_id, *bundle_id).unwrap());
// If there is now only a single bundle group that contains this bundle,
// merge it into the remaining source bundles. If it is orphaned entirely, remove it.
let count = bundle_graph.neighbors_directed(*bundle_id, Incoming).count();
if count == 1 {
remove_bundle(&g, &mut bundle_graph, *bundle_id);
} else if count == 0 {
bundle_graph.remove_node(*bundle_id);
}
}
}
}
println!("bundle graph {:?}", Dot::new(&bundle_graph));
for bundle_id in bundle_graph.node_indices() {
let bundle = &bundle_graph[bundle_id];
println!("{} {}", bundle.asset_ids.iter().map(|n| g[*n].name).collect::<Vec<&str>>().join(", "), bundle.size)
}
}
fn remove_bundle(
asset_graph: &Graph<Asset, Dependency>,
bundle_graph: &mut Graph<Bundle, i32>,
bundle_id: NodeIndex
) {
let bundle = bundle_graph.remove_node(bundle_id).unwrap();
for asset_id in &bundle.asset_ids {
for source_bundle_id in &bundle.source_bundles {
let bundle = &mut bundle_graph[*source_bundle_id];
bundle.asset_ids.push(*asset_id);
bundle.size += asset_graph[*asset_id].size;
}
}
}
fn build_graph<'a>() -> (Graph<Asset<'a>, Dependency>, Vec<NodeIndex>) {
let mut g = Graph::new();
let mut entries = Vec::new();
let html = g.add_node(Asset {
name: "a.html",
asset_type: AssetType::HTML,
size: 10
});
let html2 = g.add_node(Asset {
name: "b.html",
asset_type: AssetType::HTML,
size: 10
});
let js = g.add_node(Asset {
name: "a.js",
asset_type: AssetType::JavaScript,
size: 10
});
let js2 = g.add_node(Asset {
name: "async.js",
asset_type: AssetType::JavaScript,
size: 10
});
let js3 = g.add_node(Asset {
name: "async2.js",
asset_type: AssetType::JavaScript,
size: 10
});
let js4 = g.add_node(Asset {
name: "b.js",
asset_type: AssetType::JavaScript,
size: 10
});
let js5 = g.add_node(Asset {
name: "shared.js",
asset_type: AssetType::JavaScript,
size: 10
});
let css = g.add_node(Asset {
name: "styles.css",
asset_type: AssetType::CSS,
size: 10
});
g.add_edge(html, js, Dependency {
is_async: false
});
g.add_edge(js, js2, Dependency {
is_async: true
});
g.add_edge(js, js3, Dependency {
is_async: false
});
g.add_edge(js2, js3, Dependency {
is_async: false
});
g.add_edge(js3, js5, Dependency {
is_async: false
});
g.add_edge(js, css, Dependency {
is_async: false
});
g.add_edge(html2, js4, Dependency {
is_async: false
});
g.add_edge(js4, js5, Dependency {
is_async: false
});
entries.push(html);
entries.push(html2);
return (g, entries);
}