-
Notifications
You must be signed in to change notification settings - Fork 8
/
asm.py
238 lines (187 loc) · 7.34 KB
/
asm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import ast
from . import bytecode
from util import flipped, joining, adding
class UnresolvedReference(object):
def __init__(self, id):
self.id = id
def resolve(self, labels):
for l in labels:
if l.id == self.id:
self.__class__ = bytecode.Label.LabelRef
self.ref = l
del self.id
break
else:
raise Exception("Unresolved label")
class DataBlock(object):
def read_binary(self, data):
self.data = data
def read_ast(self, data):
self.data = ast.literal_eval(data)
length = property(lambda self: len(self.data))
def to_asm(self):
return repr(self.data)
def to_binary(self, startpos):
return self.data
class CodeBlock(object):
pass
class FreeCodeBlock(CodeBlock):
def __init__(self):
self.code = [] # bytecode objects
@joining
def to_asm(self):
for c in self.code:
code = repr(c)
yield code
def append(self, command):
self.code.append(command)
def fixed_code(self, code_start):
positions = [] # self.code index -> code position
def update_positions():
positions[:] = []
pos = code_start
for command in self.code:
positions.append(pos)
pos += command.length
for (ln, command) in enumerate(self.code):
if isinstance(command, bytecode.VariableAddressCommand) and isinstance(command.address, bytecode.Label.LabelRef):
assert command.address.ref in self.code, "label not found: %r"%command.address
command.reladdr = positions[self.code.index(command.address.ref)] - positions[ln]
update_positions()
for i in range(2): # one time enhances the positions, two times only enhances corner cases
for command in self.code:
if isinstance(command, bytecode.VariableLengthCommand):
command.prebake()
update_positions()
fixed = FixedPositionCodeBlock()
for (pos, c) in zip(positions, self.code):
if isinstance(c, bytecode.Label):
if c.export:
fixed.sym[c.export] = pos
continue
assert pos not in fixed.code
fixed.code[pos] = c
return fixed
class FixedPositionCodeBlock(CodeBlock):
def __init__(self):
self.code = {} # position -> bytecode
self.sym = {} # export label -> position
@property
def length(self):
maxindex = max(self.code)
return maxindex + self.code[maxindex].length
def read_binary(self, data, firstpos):
pos = firstpos
while pos < len(data):
command = bytecode.interpret(data, pos-firstpos)
self.code[pos] = command
pos += command.length
@joining
def to_asm(self):
for (lineno, c) in sorted(self.code.items()):
yield "%r # %04x"%(c, lineno)
def unfixed_code(self):
labels = {} # position -> label object
generalized = {} # like self.code
for lineno, command in sorted(self.code.items()):
g = command.generalize(lineno)
if isinstance(g, bytecode.VariableAddressCommand):
g.address = labels.setdefault(g.address, bytecode.Label()).get_ref()
generalized[lineno] = g
newcode = FreeCodeBlock()
for lineno, command in sorted(generalized.items()):
if lineno in labels:
newcode.code.append(labels[lineno])
newcode.code.append(command)
return newcode
@adding
def to_binary(self, startpos):
lastpos = startpos
for (pos, c) in sorted(self.code.items()):
assert lastpos == pos
yield c.to_bin()
lastpos += c.length
class ASM(object):
def __init__(self):
self.blocks = []
def read_binary(self, data, code_offset, generalize=False):
datablock = DataBlock()
datablock.read_binary(data[:code_offset])
self.blocks.append(datablock)
codeblock = FixedPositionCodeBlock()
codeblock.read_binary(data[code_offset:], code_offset)
self.blocks.append(codeblock)
@joining
def to_asm(self):
for b in self.blocks:
yield b.to_asm()
def read_asm(self, data):
nodes = ast.parse(data).body
pos = 0
def finish_codebuffer():
if not codebuffer:
return
if pos is None:
codeblock = FreeCodeBlock()
for (i, c) in codebuffer:
codeblock.code.append(c)
else:
codeblock = FixedPositionCodeBlock()
for (i, c) in codebuffer:
assert i not in codeblock.code
codeblock.code[i] = c
self.blocks.append(codeblock)
codebuffer[:] = []
while unrefs:
unrefs.pop().resolve(labels)
labels[:] = []
codebuffer = [] # (pos, code)
labels = []
unrefs = []
for node in nodes:
(target, ) = getattr(node, 'targets', [None])
if target is not None:
target = target.id
value = node.value
if not isinstance(value, ast.Call):
finish_codebuffer()
# assume it's data
assert pos is not None, "Can not have global data segment after free code"
datablock = DataBlock()
datablock.read_ast(value)
self.blocks.append(datablock)
pos += datablock.length
else: # it's a call -- this is code.
assert value.kwargs is None and value.starargs is None
commandclass = getattr(bytecode, value.func.id)
assert issubclass(commandclass, bytecode.ByteCodeCommand)
keywords = dict((k.arg, k.value) for k in value.keywords) if value.keywords else {}
for (k, v) in keywords.items():
if isinstance(v, ast.Call) and isinstance(v.func, ast.Name) and v.func.id == 'LabelRef':
ref = UnresolvedReference(ast.literal_eval(v.args[0]))
unrefs.append(ref)
keywords[k] = ref
else:
keywords[k] = ast.literal_eval(v)
command = commandclass(**keywords)
if isinstance(command, bytecode.Label):
labels.append(command)
codebuffer.append((pos, command))
if pos is None or isinstance(command, bytecode.VariableLengthCommand):
pos = None
else:
pos += command.length
finish_codebuffer()
@adding
def to_binary(self, startpos=0):
pos = startpos
for b in self.blocks:
data = b.to_binary(pos)
pos += len(data)
yield data
def unfix_all(self):
self.blocks = [b.unfixed_code() if isinstance(b, FixedPositionCodeBlock) else b for b in self.blocks]
def fix_all(self):
for i in range(len(self.blocks)):
if isinstance(self.blocks[i], FreeCodeBlock):
self.blocks[i] = self.blocks[i].fixed_code(sum(bb.length for bb in self.blocks[:i]))