-
Notifications
You must be signed in to change notification settings - Fork 734
/
Relooper.h
359 lines (293 loc) · 12.1 KB
/
Relooper.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/*
* Copyright 2016 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
This is an optimized C++ implemention of the Relooper algorithm originally
developed as part of Emscripten. This implementation includes optimizations
added since the original academic paper [1] was published about it.
[1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In Proceedings of the ACM international conference companion on Object oriented programming systems languages and applications companion (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 http://doi.acm.org/10.1145/2048147.2048224
*/
#include <assert.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <deque>
#include <list>
#include <map>
#include <memory>
#include <set>
#include "wasm.h"
#include "wasm-builder.h"
namespace CFG {
class RelooperBuilder : public wasm::Builder {
wasm::Index labelHelper;
public:
RelooperBuilder(wasm::Module& wasm, wasm::Index labelHelper) : wasm::Builder(wasm), labelHelper(labelHelper) {}
wasm::GetLocal* makeGetLabel() {
return makeGetLocal(labelHelper, wasm::i32);
}
wasm::SetLocal* makeSetLabel(wasm::Index value) {
return makeSetLocal(labelHelper, makeConst(wasm::Literal(int32_t(value))));
}
wasm::Binary* makeCheckLabel(wasm::Index value) {
return makeBinary(wasm::EqInt32, makeGetLabel(), makeConst(wasm::Literal(int32_t(value))));
}
// breaks are on blocks, as they can be specific, we make one wasm block per basic block
wasm::Break* makeBlockBreak(int id) {
return wasm::Builder::makeBreak(getBlockBreakName(id));
}
// continues are on shapes, as there is one per loop, and if we have more than one
// going there, it is irreducible control flow anyhow
wasm::Break* makeShapeContinue(int id) {
return wasm::Builder::makeBreak(getShapeContinueName(id));
}
wasm::Name getBlockBreakName(int id) {
return wasm::Name(std::string("block$") + std::to_string(id) + "$break");
}
wasm::Name getShapeContinueName(int id) {
return wasm::Name(std::string("shape$") + std::to_string(id) + "$continue");
}
};
struct Block;
struct Shape;
// Info about a branching from one block to another
struct Branch {
enum FlowType {
Direct = 0, // We will directly reach the right location through other means, no need for continue or break
Break = 1,
Continue = 2
};
Shape *Ancestor; // If not NULL, this shape is the relevant one for purposes of getting to the target block. We break or continue on it
Branch::FlowType Type; // If Ancestor is not NULL, this says whether to break or continue
// A branch either has a condition expression if the block ends in ifs, or if the block ends in a switch, then a list of indexes, which
// becomes the indexes in the table of the switch. If not a switch, the condition can be any expression.
wasm::Expression* Condition;
std::unique_ptr<std::vector<wasm::Index>> SwitchValues; // switches are rare, so have just a pointer here
wasm::Expression* Code; // If provided, code that is run right before the branch is taken. This is useful for phis
Branch(wasm::Expression* ConditionInit, wasm::Expression* CodeInit = nullptr);
Branch(std::vector<wasm::Index>&& ValuesInit, wasm::Expression* CodeInit = nullptr);
// Emits code for branch
wasm::Expression* Render(RelooperBuilder& Builder, Block *Target, bool SetLabel);
};
// like std::set, except that begin() -> end() iterates in the
// order that elements were added to the set (not in the order
// of operator<(T, T))
template<typename T>
struct InsertOrderedSet
{
std::map<T, typename std::list<T>::iterator> Map;
std::list<T> List;
typedef typename std::list<T>::iterator iterator;
iterator begin() { return List.begin(); }
iterator end() { return List.end(); }
void erase(const T& val) {
auto it = Map.find(val);
if (it != Map.end()) {
List.erase(it->second);
Map.erase(it);
}
}
void erase(iterator position) {
Map.erase(*position);
List.erase(position);
}
// cheating a bit, not returning the iterator
void insert(const T& val) {
auto it = Map.find(val);
if (it == Map.end()) {
List.push_back(val);
Map.insert(std::make_pair(val, --List.end()));
}
}
size_t size() const { return Map.size(); }
void clear() {
Map.clear();
List.clear();
}
size_t count(const T& val) const { return Map.count(val); }
InsertOrderedSet() {}
InsertOrderedSet(const InsertOrderedSet& other) {
*this = other;
}
InsertOrderedSet& operator=(const InsertOrderedSet& other) {
clear();
for (auto i : other.List) {
insert(i); // inserting manually creates proper iterators
}
return *this;
}
};
// like std::map, except that begin() -> end() iterates in the
// order that elements were added to the map (not in the order
// of operator<(Key, Key))
template<typename Key, typename T>
struct InsertOrderedMap
{
std::map<Key, typename std::list<std::pair<Key,T>>::iterator> Map;
std::list<std::pair<Key,T>> List;
T& operator[](const Key& k) {
auto it = Map.find(k);
if (it == Map.end()) {
List.push_back(std::make_pair(k, T()));
auto e = --List.end();
Map.insert(std::make_pair(k, e));
return e->second;
}
return it->second->second;
}
typedef typename std::list<std::pair<Key,T>>::iterator iterator;
iterator begin() { return List.begin(); }
iterator end() { return List.end(); }
void erase(const Key& k) {
auto it = Map.find(k);
if (it != Map.end()) {
List.erase(it->second);
Map.erase(it);
}
}
void erase(iterator position) {
erase(position->first);
}
size_t size() const { return Map.size(); }
size_t count(const Key& k) const { return Map.count(k); }
InsertOrderedMap() {}
InsertOrderedMap(InsertOrderedMap& other) {
abort(); // TODO, watch out for iterators
}
InsertOrderedMap& operator=(const InsertOrderedMap& other) {
abort(); // TODO, watch out for iterators
}
};
typedef InsertOrderedSet<Block*> BlockSet;
typedef InsertOrderedMap<Block*, Branch*> BlockBranchMap;
// Represents a basic block of code - some instructions that end with a
// control flow modifier (a branch, return or throw).
struct Block {
// Branches become processed after we finish the shape relevant to them. For example,
// when we recreate a loop, branches to the loop start become continues and are now
// processed. When we calculate what shape to generate from a set of blocks, we ignore
// processed branches.
// Blocks own the Branch objects they use, and destroy them when done.
BlockBranchMap BranchesOut;
BlockSet BranchesIn;
BlockBranchMap ProcessedBranchesOut;
BlockSet ProcessedBranchesIn;
Shape *Parent; // The shape we are directly inside
int Id; // A unique identifier, defined when added to relooper
wasm::Expression* Code; // The code in this block. This can be arbitrary wasm code, including internal control flow, it should just not branch to the outside
wasm::Expression* SwitchCondition; // If nullptr, then this block ends in ifs (or nothing). otherwise, this block ends in a switch, done on this condition
bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching us requires setting the label variable
Block(wasm::Expression* CodeInit, wasm::Expression* SwitchConditionInit = nullptr);
~Block();
// Add a branch: if the condition holds we branch (or if null, we branch if all others failed)
// Note that there can be only one branch from A to B (if you need multiple conditions for the branch,
// create a more interesting expression in the Condition).
void AddBranchTo(Block *Target, wasm::Expression* Condition, wasm::Expression* Code = nullptr);
// Add a switch branch: if the switch condition is one of these values, we branch (or if the list is empty, we are the default)
// Note that there can be only one branch from A to B (if you need multiple values for the branch, that's what the array and default are for).
void AddSwitchBranchTo(Block *Target, std::vector<wasm::Index>&& Values, wasm::Expression* Code = nullptr);
// Emit code for the block, including its contents and branchings out
wasm::Expression* Render(RelooperBuilder& Builder, bool InLoop);
};
// Represents a structured control flow shape, one of
//
// Simple: No control flow at all, just instructions in a single
// basic block.
//
// Multiple: A shape with at least one entry. We may visit one of
// the entries, or none, before continuing to the next
// shape after this.
//
// Loop: An infinite loop. We assume the property that a loop
// will always visit one of its entries, and so for example
// we cannot have a loop containing a multiple and nothing
// else (since we might not visit any of the multiple's
// blocks). Multiple entries are possible for the block,
// however, which is necessary for irreducible control
// flow, of course.
//
struct SimpleShape;
struct MultipleShape;
struct LoopShape;
struct Shape {
int Id; // A unique identifier. Used to identify loops, labels are Lx where x is the Id. Defined when added to relooper
Shape *Next; // The shape that will appear in the code right after this one
Shape *Natural; // The shape that control flow gets to naturally (if there is Next, then this is Next)
enum ShapeType {
Simple,
Multiple,
Loop
};
ShapeType Type;
Shape(ShapeType TypeInit) : Id(-1), Next(NULL), Type(TypeInit) {}
virtual ~Shape() {}
virtual wasm::Expression* Render(RelooperBuilder& Builder, bool InLoop) = 0;
static SimpleShape *IsSimple(Shape *It) { return It && It->Type == Simple ? (SimpleShape*)It : NULL; }
static MultipleShape *IsMultiple(Shape *It) { return It && It->Type == Multiple ? (MultipleShape*)It : NULL; }
static LoopShape *IsLoop(Shape *It) { return It && It->Type == Loop ? (LoopShape*)It : NULL; }
};
struct SimpleShape : public Shape {
Block *Inner;
SimpleShape() : Shape(Simple), Inner(NULL) {}
wasm::Expression* Render(RelooperBuilder& Builder, bool InLoop) override;
};
typedef std::map<int, Shape*> IdShapeMap;
struct MultipleShape : public Shape {
IdShapeMap InnerMap; // entry block ID -> shape
MultipleShape() : Shape(Multiple) {}
wasm::Expression* Render(RelooperBuilder& Builder, bool InLoop) override;
};
struct LoopShape : public Shape {
Shape *Inner;
BlockSet Entries; // we must visit at least one of these
LoopShape() : Shape(Loop), Inner(NULL) {}
wasm::Expression* Render(RelooperBuilder& Builder, bool InLoop) override;
};
// Implements the relooper algorithm for a function's blocks.
//
// Usage:
// 1. Instantiate this struct.
// 2. Call AddBlock with the blocks you have. Each should already
// have its branchings in specified (the branchings out will
// be calculated by the relooper).
// 3. Call Render().
//
// Implementation details: The Relooper instance has
// ownership of the blocks and shapes, and frees them when done.
struct Relooper {
std::deque<Block*> Blocks;
std::deque<Shape*> Shapes;
Shape *Root;
bool MinSize;
int BlockIdCounter;
int ShapeIdCounter;
Relooper();
~Relooper();
void AddBlock(Block *New, int Id=-1);
// Calculates the shapes
void Calculate(Block *Entry);
// Renders the result.
wasm::Expression* Render(RelooperBuilder& Builder);
// Sets us to try to minimize size
void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
};
typedef InsertOrderedMap<Block*, BlockSet> BlockBlockSetMap;
#ifdef RELOOPER_DEBUG
struct Debugging {
static void Dump(BlockSet &Blocks, const char *prefix=NULL);
static void Dump(Shape *S, const char *prefix=NULL);
};
#endif
} // namespace CFG