From c6fed51134ab53beb13d8b9bdd69e3169921e37c Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 15 Feb 2012 10:25:09 -0600 Subject: [PATCH 01/36] makes scanner PJI* handling more robust, and a couple other fixes --- uncompyle2/Scanner.py | 628 +++++++++++++++++++++-------------------- uncompyle2/Walker.py | 4 +- uncompyle2/__init__.py | 5 +- 3 files changed, 329 insertions(+), 308 deletions(-) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index d228b31..cc08b9a 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -9,9 +9,15 @@ import types import dis +from collections import namedtuple +from array import array -globals().update(dis.opmap) +globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()}) +PJIF = POP_JUMP_IF_FALSE +PJIT = POP_JUMP_IF_TRUE +JA = JUMP_ABSOLUTE +JF = JUMP_FORWARD class Token: """ @@ -20,11 +26,12 @@ class Token: A byte-code token is equivalent to the contents of one line as output by dis.dis(). """ - def __init__(self, type, attr=None, pattr=None, offset=-1): - self.type = intern(type) + def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False): + self.type = intern(type_) self.attr = attr self.pattr = pattr self.offset = offset + self.linestart = linestart def __cmp__(self, o): if isinstance(o, Token): @@ -36,7 +43,11 @@ def __cmp__(self, o): def __repr__(self): return str(self.type) def __str__(self): pattr = self.pattr or '' - return '%s\t%-17s %r' % (self.offset, self.type, pattr) + if self.linestart: + return '\n%s\t%-17s %r' % (self.offset, self.type, pattr) + else: + return '%s\t%-17s %r' % (self.offset, self.type, pattr) + def __hash__(self): return hash(self.type) def __getitem__(self, i): raise IndexError @@ -56,21 +67,15 @@ def __init__(self, co, scanner, classname=None): class Scanner: def __init__(self, version): - self.__version = version + self.version = version from sys import version_info - self.__pyversion = float('%d.%d' % version_info[0:2]) + self.pyversion = float('%d.%d' % version_info[0:2]) self.resetTokenClass() self.JUMP_OPs = map(lambda op: dis.opname[op], - dis.hasjrel + dis.hasjabs) - - copmap = {} - for i in range(len(dis.cmp_op)): - copmap[dis.cmp_op[i]] = i - dis.copmap = copmap - + dis.hasjrel + dis.hasjabs) def setShowAsm(self, showasm, out=None): self.showasm = showasm @@ -93,13 +98,13 @@ def disassemble(self, co, classname=None): rv = [] customize = {} Token = self.Token # shortcut - code = co.co_code + self.code = code = array('B', co.co_code) n = len(code) self.prev = [0] i=0 while i < n: c = code[i] - op = ord(code[i]) + op = code[i] if op >= dis.HAVE_ARGUMENT: self.prev.append(i) self.prev.append(i) @@ -110,22 +115,24 @@ def disassemble(self, co, classname=None): i = i + 1 self.lines = [] + linetuple = namedtuple('linetuple', ['l_no', 'next']) self.if_lines = {} j = 0 linestarts = list(dis.findlinestarts(co)) + linestartoffsets = {a for (a, _) in linestarts} (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: - self.lines.append((prev_line_no, start_byte)) + self.lines.append(linetuple(prev_line_no, start_byte)) j += 1 - last_op = ord(code[self.prev[start_byte]]) - if last_op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): + last_op = code[self.prev[start_byte]] + if last_op in (PJIF, PJIT): self.if_lines[prev_line_no] = True else: self.if_lines[prev_line_no] = False (prev_start_byte, prev_line_no) = (start_byte, line_no) while j < n: - self.lines.append((prev_line_no, n)) + self.lines.append(linetuple(prev_line_no, n)) j+=1 self.if_lines[prev_line_no] = False @@ -157,13 +164,12 @@ def unmangle(name): offset="%s_%d" % (offset, k) )) k += 1 - c = code[i] - op = ord(c) + op = code[i] opname = dis.opname[op] i += 1 oparg = None; pattr = None if op >= dis.HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1]) * 256 + extended_arg + oparg = code[i] + code[i+1] * 256 + extended_arg extended_arg = 0 i += 2 if op == dis.EXTENDED_ARG: @@ -202,41 +208,41 @@ def unmangle(name): elif op in dis.hasfree: pattr = free[oparg] - if opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', - 'UNPACK_LIST', 'UNPACK_TUPLE', 'UNPACK_SEQUENCE', - 'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE', - 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW', - 'CALL_FUNCTION_VAR_KW', 'DUP_TOPX', + if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, + UNPACK_SEQUENCE, + MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, + CALL_FUNCTION_VAR, CALL_FUNCTION_KW, + CALL_FUNCTION_VAR_KW, DUP_TOPX, ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. - if opname == 'BUILD_TUPLE' and \ - dis.opname[ord(code[offset-3])] == 'LOAD_CLOSURE': + if op == BUILD_TUPLE and \ + code[offset-3] == LOAD_CLOSURE: continue else: opname = '%s_%d' % (opname, oparg) - if opname not in ('BUILD_SLICE_2', 'BUILD_SLICE_3'): + if op != BUILD_SLICE: customize[opname] = oparg - elif opname == 'JUMP_ABSOLUTE': - target = self.__get_target(code, offset) + elif op == JA: + target = self.get_target(offset) if target < offset: opname = 'JUMP_BACK' - elif opname == 'LOAD_GLOBAL': + elif op == LOAD_GLOBAL: try: if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE': opname = 'LOAD_ASSERT' except AttributeError: pass - elif opname == 'IMPORT_NAME': + elif op == IMPORT_NAME: if pattr == '': pattr = '.' - rv.append(Token(opname, oparg, pattr, offset)) + rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) - if self.__jump_back_else.get(offset, False): + if self.jump_back_else.get(offset, False): rv.append(Token('JUMP_BACK_ELSE', None, None, offset="%s_" % offset )) @@ -248,15 +254,15 @@ def unmangle(name): return rv, customize - def __get_target(self, code, pos, op=None): + def get_target(self, pos, op=None): if op is None: - op = ord(code[pos]) - target = ord(code[pos+1]) + ord(code[pos+2]) * 256 + op = self.code[pos] + target = self.code[pos+1] + self.code[pos+2] * 256 if op in dis.hasjrel: target += pos + 3 return target - def __first_instr(self, code, start, end, instr, target=None, exact=True): + def first_instr(self, start, end, instr, target=None, exact=True): """ Find the first in the block from start to end. is any python bytecode instruction or a list of opcodes @@ -267,23 +273,23 @@ def __first_instr(self, code, start, end, instr, target=None, exact=True): Return index to it or None if not found. """ - + code = self.code assert(start>=0 and end<=len(code)) HAVE_ARGUMENT = dis.HAVE_ARGUMENT - try: instr[0] + try: None in instr except: instr = [instr] pos = None distance = len(code) i = start while i < end: - op = ord(code[i]) + op = code[i] if op in instr: if target is None: return i - dest = self.__get_target(code, i, op) + dest = self.get_target(i, op) if dest == target: return i elif not exact: @@ -297,7 +303,7 @@ def __first_instr(self, code, start, end, instr, target=None, exact=True): i += 3 return pos - def __last_instr(self, code, start, end, instr, target=None, exact=True): + def last_instr(self, start, end, instr, target=None, exact=True): """ Find the last in the block from start to end. is any python bytecode instruction or a list of opcodes @@ -309,24 +315,25 @@ def __last_instr(self, code, start, end, instr, target=None, exact=True): Return index to it or None if not found. """ + code = self.code if not (start>=0 and end<=len(code)): return None HAVE_ARGUMENT = dis.HAVE_ARGUMENT - try: instr[0] + try: None in instr except: instr = [instr] pos = None distance = len(code) i = start while i < end: - op = ord(code[i]) + op = code[i] if op in instr: if target is None: pos = i else: - dest = self.__get_target(code, i, op) + dest = self.get_target(i, op) if dest == target: distance = 0 pos = i @@ -341,7 +348,7 @@ def __last_instr(self, code, start, end, instr, target=None, exact=True): i += 3 return pos - def __all_instr(self, code, start, end, instr, target=None): + def all_instr(self, start, end, instr, target=None, include_beyond_target=False): """ Find all in the block from start to end. is any python bytecode instruction or a list of opcodes @@ -350,42 +357,147 @@ def __all_instr(self, code, start, end, instr, target=None): Return a list with indexes to them or [] if none found. """ - + + code = self.code assert(start>=0 and end<=len(code)) HAVE_ARGUMENT = dis.HAVE_ARGUMENT - try: instr[0] + try: None in instr except: instr = [instr] result = [] i = start while i < end: - op = ord(code[i]) + op = code[i] if op in instr: if target is None: result.append(i) - elif target == self.__get_target(code, i, op): - result.append(i) - if op < HAVE_ARGUMENT: - i += 1 - else: - i += 3 + else: + t = self.get_target(i, op) + if include_beyond_target and t >= target: + result.append(i) + elif t == target: + result.append(i) + i += self.op_size(op) return result - def __next_except_jump(self, code, start, end, target): + def op_size(self, op): + if op < dis.HAVE_ARGUMENT: + return 1 + else: + return 3 + + def build_stmt_indices(self): + code = self.code + start = 0; + end = len(code) + + stmt_opcodes = { + SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, + SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, + POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, + STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, + STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, + IMPORT_NAME, IMPORT_FROM, RETURN_VALUE, RAISE_VARARGS, POP_TOP, + PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO, + STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, + DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3, + JUMP_ABSOLUTE, + } + + stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)] + + designator_ops = { + STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, + STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, + STORE_SUBSCR, UNPACK_SEQUENCE, + } + + prelim = self.all_instr(start, end, stmt_opcodes) + + stmts = self.stmts = set(prelim) + + pass_stmts = set() + for seq in stmt_opcode_seqs: + i = start + while i+len(seq)-1 < end: + match = True + j = i + for elem in seq: + if elem != code[j]: + match = False + break + j += self.op_size(code[j]) + + if match: + j = self.prev[j] + stmts.add(j) + pass_stmts.add(j) + i += self.op_size(code[i]) + + if pass_stmts: + stmt_list = list(stmts) + stmt_list.sort() + else: + stmt_list = prelim + last_stmt = -1 + self.next_stmt = [] + slist = self.next_stmt = [] + i = 0 + for s in stmt_list: + if code[s] == JA and s not in pass_stmts: + target = self.get_target(s) + if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no: + stmts.remove(s) + continue + j = self.prev[s] + while j == JA: + j = self.prev[j] + if code[j] == LIST_APPEND: #list comprehension + stmts.remove(s) + continue + elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO: + stmts.remove(s) + continue + elif code[s] in designator_ops: + j = self.prev[s] + while code[j] in designator_ops: + j = self.prev[j] + if code[j] == FOR_ITER: + stmts.remove(s) + continue + last_stmt_line = self.lines[s].l_no + slist += [s] * (s-i) + i = s + slist += [len(code)] * (len(code)-len(slist)) + + + def remove_mid_line_ifs(self, ifs): + filtered = [] + for i in ifs: + if self.lines[i].l_no == self.lines[i+3].l_no: + if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF): + continue + filtered.append(i) + return filtered + + + def next_except_jump(self, start, end, target): """ Return the next jump that was generated by an except SomeException: construct in a try...except...else clause or None if not found. """ HAVE_ARGUMENT = dis.HAVE_ARGUMENT - lookup = [JUMP_ABSOLUTE, JUMP_FORWARD] + inner_try = self.first_instr(start, end, SETUP_EXCEPT, end, False) + + lookup = [JA, JF] while start < end: - jmp = self.__first_instr(code, start, end, lookup, target) + jmp = self.first_instr(start, end, lookup, target) if jmp is None: return None - if jmp == end-3: + if jmp == self.prev[end]: return jmp after = jmp + 3 ops = [None, None, None, None] @@ -393,42 +505,43 @@ def __next_except_jump(self, code, start, end, target): pos = 0 x = jmp+3 while x <= end and pos < 4: - op = ord(code[x]) + op = self.code[x] if op >= HAVE_ARGUMENT: break ops[pos] = op opp[pos] = x pos += 1 x += 1 - if ops[0] == END_FINALLY and opp[0] == end: - return jmp - if ops[0] == DUP_TOP: - return jmp - if ops[0] == ops[1] == ops[2] == POP_TOP: - return jmp + if (ops[0] == END_FINALLY and opp[0] == end)\ + or (ops[0] == DUP_TOP)\ + or (ops[0] == ops[1] == ops[2] == POP_TOP): + inner_trys = self.all_instr(start, jmp, SETUP_EXCEPT) + inner_finallys = self.all_instr(start, jmp, END_FINALLY) + if len(inner_trys) == len(inner_finallys): + return jmp start = jmp + 3 return None - def __fix_parent(self, code, target, parent): + def fix_parent(self, target, parent): """Fix parent boundaries if needed""" start = parent['start'] end = parent['end'] - if target >= start or end-start < 3 or target not in self.__loops: + if target >= start or end-start < 3 or target not in self.loops: return - if ord(code[end-3])==JUMP_ABSOLUTE: - cont_target = self.__get_target(code, end-3, JUMP_ABSOLUTE) + if self.code[self.prev[end]]==JA: + cont_target = self.get_target(end-3, JA) if target == cont_target: parent['end'] = end-3 - def __restrict_to_parent(self, target, parent): + def restrict_to_parent(self, target, parent): """Restrict pos to parent boundaries.""" if not (parent['start'] < target < parent['end']): target = parent['end'] return target - def __detect_structure(self, code, pos, op=None): + def detect_structure(self, pos, op=None): """ Detect structures and their boundaries to fix optimizied jumps in python2.3+ @@ -436,15 +549,16 @@ def __detect_structure(self, code, pos, op=None): # TODO: check the struct boundaries more precisely -Dan + code = self.code # Ev remove this test and make op a mandatory argument -Dan if op is None: - op = ord(code[pos]) + op = code[pos] ## Detect parent structure - parent = self.__structs[0] + parent = self.structs[0] start = parent['start'] end = parent['end'] - for s in self.__structs: + for s in self.structs: _start = s['start'] _end = s['end'] if (_start <= pos < _end) and (_start >= start and _end <= end): @@ -453,297 +567,205 @@ def __detect_structure(self, code, pos, op=None): parent = s ## We need to know how many new structures were added in this run - origStructCount = len(self.__structs) + origStructCount = len(self.structs) if op == SETUP_LOOP: start = pos+3 - target = self.__get_target(code, pos, op) - end = self.__restrict_to_parent(target, parent) + target = self.get_target(pos, op) + end = self.restrict_to_parent(target, parent) if target != end: - self.__fixed_jumps[pos] = end + self.fixed_jumps[pos] = end (line_no, next_line_byte) = self.lines[pos] - jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE, + jump_back = self.last_instr(start, end, JA, next_line_byte, False) if not jump_back: return - if self.__get_target(code, jump_back) >= next_line_byte: - jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE, + if self.get_target(jump_back) >= next_line_byte: + jump_back = self.last_instr(start, end, JA, start, False) - if end > jump_back+4 and ord(code[end]) in (JUMP_FORWARD, JUMP_ABSOLUTE): - if ord(code[jump_back+4]) in (JUMP_ABSOLUTE,): - if self.__get_target(code, jump_back+4) == self.__get_target(code, end): - self.__fixed_jumps[pos] = jump_back+4 + if end > jump_back+4 and code[end] in (JF, JA): + if code[jump_back+4] in (JA, JF): + if self.get_target(jump_back+4) == self.get_target(end): + self.fixed_jumps[pos] = jump_back+4 end = jump_back+4 elif target < pos: - self.__fixed_jumps[pos] = jump_back+4 + self.fixed_jumps[pos] = jump_back+4 end = jump_back+4 - target = self.__get_target(code, jump_back, JUMP_ABSOLUTE) + target = self.get_target(jump_back, JA) - if ord(code[target]) in (FOR_ITER, GET_ITER): + if code[target] in (FOR_ITER, GET_ITER): loop_type = 'for' else: loop_type = 'while' (line_no, next_line_byte) = self.lines[pos] test = self.prev[next_line_byte] assert(test is not None) - test_target = self.__get_target(code, test) + test_target = self.get_target(test) if test_target > (jump_back+3): jump_back = test_target - self.__loops.append(target) - self.__structs.append({'type': loop_type + '-loop', + self.loops.append(target) + self.structs.append({'type': loop_type + '-loop', 'start': target, 'end': jump_back}) - self.__structs.append({'type': loop_type + '-else', + self.structs.append({'type': loop_type + '-else', 'start': jump_back+3, 'end': end}) elif op == SETUP_EXCEPT: start = pos+3 - target = self.__get_target(code, pos, op) - end = self.__restrict_to_parent(target, parent) + target = self.get_target(pos, op) + end = self.restrict_to_parent(target, parent) if target != end: - self.__fixed_jumps[pos] = end + self.fixed_jumps[pos] = end #print target, end, parent ## Add the try block - self.__structs.append({'type': 'try', + self.structs.append({'type': 'try', 'start': start, 'end': end-4}) ## Now isolate the except and else blocks start = end - target = self.__get_target(code, self.prev[start]) - self.__fix_parent(code, target, parent) - end = self.__restrict_to_parent(target, parent) + target = self.get_target(self.prev[start]) + self.fix_parent(target, parent) + end = self.restrict_to_parent(target, parent) #if target != end: - # self.__fixed_jumps[self.prev[start]] = end + # self.fixed_jumps[self.prev[start]] = end - end_finally = self.__last_instr(code, start, end, END_FINALLY) + end_finally = self.last_instr(start, end, END_FINALLY) if end_finally is None: return - lookup = [JUMP_FORWARD] - jump_end = self.__last_instr(code, start, end, lookup) + lookup = [JF] + jump_end = self.last_instr(start, end, lookup) if jump_end: - target = self.__get_target(code, jump_end) - end = self.__restrict_to_parent(target, parent) + target = self.get_target(jump_end) + end = self.restrict_to_parent(target, parent) # if target != end: - # self.__fixed_jumps[jump_end] = end + # self.fixed_jumps[jump_end] = end ## Add the try-else block - self.__structs.append({'type': 'try-else', + self.structs.append({'type': 'try-else', 'start': end_finally+1, 'end': end}) ## Add the except blocks i = start while i < end_finally: - jmp = self.__next_except_jump(code, i, end_finally, target) + jmp = self.next_except_jump(i, end_finally, target) if jmp is None: break - self.__structs.append({'type': 'except', + self.structs.append({'type': 'except', 'start': i, 'end': jmp}) # if target != end: - # self.__fixed_jumps[jmp] = end + # self.fixed_jumps[jmp] = end i = jmp+3 - elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): - start = pos+3 - target = self.__get_target(code, pos, op) - rtarget = self.__restrict_to_parent(target, parent) + elif op in (PJIF, PJIT): - (line_no, next_line_byte) = self.lines[pos] + start = pos+3 + target = self.get_target(pos, op) + rtarget = self.restrict_to_parent(target, parent) + pre = self.prev - if target == rtarget: - prev_target = self.prev[target] - prev_target_op = ord(code[prev_target]) - target_op = ord(code[target]) - if prev_target_op == JUMP_ABSOLUTE and target_op != POP_BLOCK: - if self.__get_target(code, prev_target) < pos: - self.__jump_back_else[prev_target] = True - - #is this part of a larger expression - if (ord(code[self.prev[target]]) in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, - POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > pos): - self.__fixed_jumps[pos] = self.prev[target] - return - - #is this not at the end of a line - if line_no == self.lines[start][0]: - #is this a one line if with multiple tests - good_op = False - prev = self.prev[next_line_byte] - p_op = ord(code[prev]) - if op == POP_JUMP_IF_FALSE: - if target == next_line_byte: - if p_op == JUMP_FORWARD: - if self.__get_target(code, prev) == target: - good_op = True - if p_op == RETURN_VALUE: - good_op = True - else: - if start < target < next_line_byte: - if ord(code[self.prev[target]]) in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE): - good_op = True - while p_op in (JUMP_ABSOLUTE, JUMP_FORWARD, POP_BLOCK): - if p_op in (JUMP_ABSOLUTE, JUMP_FORWARD): - if self.__get_target(code, prev) == target: - good_op = True - break - prev = self.prev[prev] - p_op = ord(code[prev]) - if good_op: - last = self.__last_instr(code, start, next_line_byte, - (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target) - if last: - self.__fixed_jumps[pos] = last - return - else: - while p_op in (JUMP_ABSOLUTE, JUMP_FORWARD, POP_BLOCK): - if p_op in (JUMP_ABSOLUTE, JUMP_FORWARD): - if self.__get_target(code, prev) == target: - last = self.__last_instr(code, start, next_line_byte, - (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) - if last: - self.__fixed_jumps[pos] = last - return - break - prev = self.prev[prev] - p_op = ord(code[prev]) - - #if ifline - if self.if_lines.get(line_no, False): - if (target >= next_line_byte) or (target < pos): - if not (line_no == self.lines[target][0]): - self.__fixed_jumps[pos] = self.prev[next_line_byte] - return - if self.if_lines.get(line_no+1, False): - next_if = self.prev[self.lines[next_line_byte][1]] - if target == self.__get_target(code, next_if): - self.__fixed_jumps[pos] = next_if - elif (op == POP_JUMP_IF_TRUE) and (ord(code[next_if+3]) == JUMP_ABSOLUTE) and (target == self.__get_target(code, next_if+3)) and (target < pos): - self.__fixed_jumps[pos] = next_if - return - else: - if self.lines[target][0] > line_no: - next = self.__first_instr(code, start, target, POP_JUMP_IF_FALSE, target) - j = self.__first_instr(code, start, target, JUMP_ABSOLUTE, target) - if next and not j: - self.__fixed_jumps[pos] = next - return + #does this jump to right after another cond jump? + # if so, it's part of a larger conditional + if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, + PJIF, PJIT)) and (target > pos): + self.fixed_jumps[pos] = pre[target] return - - if op == POP_JUMP_IF_FALSE: - i = self.lines[next_line_byte][0] - k = j = next_line_byte - num_pj = 1 - while ((self.if_lines.get(i, False) - and ((self.__get_target(code, self.lines[j][1]-3) == target) - or ((ord(code[self.lines[j][1]-3]) == POP_JUMP_IF_TRUE) - and (ord(code[self.__get_target(code, self.lines[j][1]-3)-3]) == POP_JUMP_IF_FALSE) - and (self.__get_target(code, self.__get_target(code, self.lines[j][1]-3)-3) == target)))) - or (ord(code[self.prev[self.lines[j][1]]]) in (LOAD_ATTR, LOAD_FAST, JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP))): - if (self.if_lines.get(i, False) and (self.__get_target(code, self.lines[j][1]-3) == target)): - num_pj += 1 - j = self.lines[j][1] - i = self.lines[j][0] - if (ord(code[self.prev[j]]) not in (LOAD_ATTR, LOAD_FAST, JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP)): - k = j - if k > next_line_byte: - if num_pj > 1 and target > pos: - prev_end = self.prev[rtarget] - num_pj += len({ self.lines[a][0] for a in self.__all_instr(code, k, prev_end, (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)}) - num_pr = len({ self.lines[a][0] for a in self.__all_instr(code, k, prev_end, (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), rtarget)}) - num_jumps = 0 - while ord(code[prev_end]) in (JUMP_FORWARD, JUMP_ABSOLUTE) and self.__get_target(code, prev_end) == target: - num_pr += len({ self.lines[a][0] for a in self.__all_instr(code, k, prev_end, (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), prev_end)}) - num_jumps += 1 - prev_end = self.prev[prev_end] - if ord(code[prev_end]) == RETURN_VALUE: - num_jumps += 1 - num_pj += num_pr - num_pj += len(self.__all_instr(code, k, prev_end, (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)) - if num_pj > num_jumps: - self.__fixed_jumps[pos] = k-3 + + # is this an if-else at end of a loop? + # if so, indicate with special opcode to help parser + prev_target = pre[target] + prev_target_op = code[prev_target] + target_op = code[target] + if target == rtarget: + if prev_target_op == JA and target_op != POP_BLOCK: + if self.get_target(prev_target) < pos: + self.jump_back_else[prev_target] = True + + # is this an if and + if op == PJIF: + #import pdb; pdb.set_trace() + match = self.all_instr(start, self.next_stmt[pos], (PJIF, PJIT), target) + match = self.remove_mid_line_ifs(match) + if match: + if code[pre[rtarget]] in (JF, JA) \ + and pre[rtarget] not in self.stmts \ + and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget: + if code[pre[pre[rtarget]]] == JA \ + and target == self.get_target(pre[pre[rtarget]]) \ + and pre[pre[rtarget]] not in self.stmts \ + and 1 == len(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ + (PJIF, PJIT), target))): + pass + elif code[pre[pre[rtarget]]] == RETURN_VALUE \ + and 1 == (len(set(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ + (PJIF, PJIT), target))) \ + | set(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ + (PJIF, PJIT, JA), pre[rtarget], True))))): + pass + else: + fix = None + jump_ifs = self.all_instr(start, self.next_stmt[pos], (PJIF, PJIT)) + last_jump_good = True + for j in jump_ifs: + if code[j] == PJIF and target == self.get_target(j): + if self.lines[j].next == j+3 and last_jump_good: + fix = j + break + else: + last_jump_good = False + self.fixed_jumps[pos] = fix or match[-1] return else: - self.__fixed_jumps[pos] = k-3 + self.fixed_jumps[pos] = match[-1] return - -# elif op == POP_JUMP_IF_TRUE and target > pos: -# i = self.lines[next_line_byte][0] -# j = next_line_byte -# while (self.if_lines.get(i, False) -# and ((self.__get_target(code, self.lines[j][1]-3) == target) -# and (ord(code[self.lines[j][1]-3]) == POP_JUMP_IF_TRUE))): -# j = self.lines[j][1] -# i = self.lines[j][0] -# if j > next_line_byte: -# self.__fixed_jumps[pos] = j-3 -# return - elif op == POP_JUMP_IF_TRUE: - def equaljumps(jump1, jump2): - jump_ops = (JUMP_ABSOLUTE, JUMP_FORWARD) - while ord(code[jump1]) in jump_ops: - jump1 = self.__get_target(code, jump1) - while ord(code[jump2]) in jump_ops: - jump2 = self.__get_target(code, jump2) - return jump1 == jump2 - i = self.lines[next_line_byte][0] - j = next_line_byte - while self.if_lines.get(i, False): - j = self.lines[j][1] - i = self.lines[j][0] - if j > next_line_byte: - if ord(code[j]) == JUMP_ABSOLUTE and equaljumps(j, target): - self.__fixed_jumps[pos] = j-3 + else: + next = self.next_stmt[pos] + if pre[next] == pos: + pass + elif code[next] in (JF, JA) and target == self.get_target(next): + if code[pre[next]] in (PJIF, PJIT): + self.fixed_jumps[pos] = pre[next] return - - if (target < pos) and ((ord(code[target]) == FOR_ITER) or (ord(code[self.prev[target]]) == SETUP_LOOP)): -# self.__end_if_line[start] = 0 - - if ord(code[self.prev[end]]) == JUMP_ABSOLUTE: - if self.__get_target(code, self.prev[end]) == target: - self.__structs.append({'type': 'if-then', - 'start': pos, - 'end': self.prev[end]}) -# print self.__structs[-1] - return - + elif code[next] == JA and code[target] in (JA, JF) \ + and self.get_target(target) == self.get_target(next): + self.fixed_jumps[pos] = pre[next] + return + #does the if jump just beyond a jump op, then this is probably an if statement - if ord(code[self.prev[rtarget]]) in (JUMP_ABSOLUTE, JUMP_FORWARD): - if_end = self.__get_target(code, self.prev[rtarget]) - - if (if_end < self.prev[rtarget]) and (ord(code[self.prev[if_end]]) == SETUP_LOOP): - loopjump = self.__last_instr(code, start, end, JUMP_ABSOLUTE, if_end) + if code[pre[rtarget]] in (JA, JF): + if_end = self.get_target(pre[rtarget]) + + #is this a loop not an if? + if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP): if(if_end > start): return - end = self.__restrict_to_parent(if_end, parent) + end = self.restrict_to_parent(if_end, parent) -# self.__end_if_line[start] = rtarget - - self.__structs.append({'type': 'if-then', + self.structs.append({'type': 'if-then', 'start': start, - 'end': self.prev[rtarget]}) + 'end': pre[rtarget]}) if rtarget < end: - self.__structs.append({'type': 'if-else', + self.structs.append({'type': 'if-else', 'start': rtarget, 'end': end}) - elif ord(code[self.prev[rtarget]]) == RETURN_VALUE: -# self.__end_if_line[start] = rtarget - # self.__fixed_jumps[pos] = rtarget - self.__structs.append({'type': 'if-then', + elif code[pre[rtarget]] == RETURN_VALUE: + self.structs.append({'type': 'if-then', 'start': start, 'end': rtarget}) + elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - target = self.__get_target(code, pos, op) + target = self.get_target(pos, op) if target > pos: - unop_target = self.__last_instr(code, pos, target, JUMP_FORWARD, target) - if unop_target and ord(code[unop_target+3]) != ROT_TWO: - self.__fixed_jumps[pos] = unop_target + unop_target = self.last_instr(pos, target, JF, target) + if unop_target and code[unop_target+3] != ROT_TWO: + self.fixed_jumps[pos] = unop_target @@ -762,43 +784,39 @@ def find_jump_targets(self, code): hasjrel = dis.hasjrel hasjabs = dis.hasjabs - needFixing = (self.__pyversion >= 2.3) - n = len(code) - self.__structs = [{'type': 'root', + self.structs = [{'type': 'root', 'start': 0, 'end': n-1}] - self.__loops = [] ## All loop entry points - self.__fixed_jumps = {} ## Map fixed jumps to their real destination - self.__jump_back_else = {} + self.loops = [] ## All loop entry points + self.fixed_jumps = {} ## Map fixed jumps to their real destination + self.jump_back_else = {} + self.build_stmt_indices() targets = {} i = 0 while i < n: - op = ord(code[i]) + op = code[i] - if needFixing: - ## Determine structures and fix jumps for 2.3+ - self.__detect_structure(code, i, op) + ## Determine structures and fix jumps for 2.3+ + self.detect_structure(i, op) if op >= HAVE_ARGUMENT: - label = self.__fixed_jumps.get(i) - oparg = ord(code[i+1]) + ord(code[i+2]) * 256 + label = self.fixed_jumps.get(i) + oparg = code[i+1] + code[i+2] * 256 if label is None: if op in hasjrel and op != FOR_ITER: label = i + 3 + oparg elif op in hasjabs: - if op in [JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP]: + if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): if (oparg > i): label = oparg if label is not None: targets[label] = targets.get(label, []) + [i] - i += 3 - else: - i += 1 + i += self.op_size(op) return targets diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index a293fa5..ea5e5cb 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -539,7 +539,9 @@ def print_docstring(self, indent, docstring): calculate_indent = sys.maxint for line in lines[1:]: stripped = line.lstrip() - calculate_indent = min(calculate_indent, len(line) - len(stripped)) + if len(stripped) > 0: + calculate_indent = min(calculate_indent, len(line) - len(stripped)) + calculate_indent = min(calculate_indent, len(lines[-1]) - len(lines[-1].lstrip())) # Remove indentation (first line is special): trimmed = [lines[0]] if calculate_indent < sys.maxint: diff --git a/uncompyle2/__init__.py b/uncompyle2/__init__.py index 67a7e96..3bf650d 100644 --- a/uncompyle2/__init__.py +++ b/uncompyle2/__init__.py @@ -71,7 +71,7 @@ def _load_module(filename): except KeyError: raise ImportError, "Unknown magic number %s in %s" % (ord(magic[0])+256*ord(magic[1]), filename) if version != '2.7': - raise ImportError, "This is a Python %s file! Only Python 2.7 files are supported." + raise ImportError, "This is a Python %s file! Only Python 2.7 files are supported." % version #print version fp.read(4) # timestamp co = marshal.load(fp) @@ -86,7 +86,8 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): # store final output stream for case of error __real_out = out or sys.stdout - + if co.co_filename: + print >>__real_out, '#Embedded file name: %s' % co.co_filename scanner = Scanner.getscanner(version) scanner.setShowAsm(showasm, out) tokens, customize = scanner.disassemble(co) From f28027f958d923575c4e4a24819370be17ff74e8 Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 15 Feb 2012 21:40:57 -0600 Subject: [PATCH 02/36] more parse fixes --- uncompyle2/Parser.py | 28 ++++++++++++---------------- uncompyle2/Scanner.py | 9 +++------ uncompyle2/Walker.py | 2 ++ 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index 2f1b0e0..dee308e 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -299,7 +299,8 @@ def p_grammar(self, args): lastl_stmt ::= iflaststmtl lastl_stmt ::= ifelsestmtl lastl_stmt ::= c_trystmt - + lastl_stmt ::= forelselaststmtl + l_stmts_opt ::= l_stmts l_stmts_opt ::= passstmt @@ -416,6 +417,7 @@ def p_grammar(self, args): iflaststmtl ::= testexpr l_stmts_opt JUMP_ABSOLUTE iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK + iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK JUMP_BACK_ELSE ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD c_stmts COME_FROM ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD return_stmts COME_FROM @@ -585,26 +587,20 @@ def p_grammar(self, args): _for ::= GET_ITER FOR_ITER _for ::= LOAD_CONST FOR_LOOP + for_block ::= l_stmts_opt _jump_back + for_block ::= return_stmts _come_from + forstmt ::= SETUP_LOOP expr _for designator - l_stmts_opt _jump_back - POP_BLOCK COME_FROM - forstmt ::= SETUP_LOOP expr _for designator - return_stmts - POP_BLOCK COME_FROM + for_block POP_BLOCK COME_FROM forelsestmt ::= SETUP_LOOP expr _for designator - l_stmts_opt _jump_back - POP_BLOCK stmts COME_FROM - forelsestmt ::= SETUP_LOOP expr _for designator - return_stmts _come_from - POP_BLOCK stmts COME_FROM + for_block POP_BLOCK stmts COME_FROM forelselaststmt ::= SETUP_LOOP expr _for designator - l_stmts_opt _jump_back - POP_BLOCK c_stmts COME_FROM - forelselaststmt ::= SETUP_LOOP expr _for designator - return_stmts _come_from - POP_BLOCK c_stmts COME_FROM + for_block POP_BLOCK c_stmts COME_FROM + + forelselaststmtl ::= SETUP_LOOP expr _for designator + for_block POP_BLOCK l_stmts COME_FROM return_stmts ::= return_stmt return_stmts ::= _stmts return_stmt diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index cc08b9a..75f7f67 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -678,13 +678,10 @@ def detect_structure(self, pos, op=None): # is this an if-else at end of a loop? # if so, indicate with special opcode to help parser - prev_target = pre[target] - prev_target_op = code[prev_target] - target_op = code[target] if target == rtarget: - if prev_target_op == JA and target_op != POP_BLOCK: - if self.get_target(prev_target) < pos: - self.jump_back_else[prev_target] = True + if code[pre[target]] == JA and code[target] != POP_BLOCK: + if self.get_target(pre[target]) < pos: + self.jump_back_else[pre[target]] = True # is this an if and if op == PJIF: diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index ea5e5cb..bfe1fe1 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -267,6 +267,8 @@ '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n', 3, 1, 4, -2), 'forelselaststmt': ( '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), + 'forelselaststmtl': ( + '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), 'trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ), 'c_trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ), 'tf_trystmt': ( '%c%-%c%+', 1, 5 ), From b77f708c7e973da147a114be73afca2aa5f419be Mon Sep 17 00:00:00 2001 From: wibiti Date: Thu, 16 Feb 2012 11:44:23 -0600 Subject: [PATCH 03/36] another bug fix --- uncompyle2/Scanner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 75f7f67..a8dc434 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -467,7 +467,7 @@ def build_stmt_indices(self): if code[j] == FOR_ITER: stmts.remove(s) continue - last_stmt_line = self.lines[s].l_no + last_stmt = s slist += [s] * (s-i) i = s slist += [len(code)] * (len(code)-len(slist)) @@ -732,6 +732,9 @@ def detect_structure(self, pos, op=None): and self.get_target(target) == self.get_target(next): self.fixed_jumps[pos] = pre[next] return + + if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts and pre[rtarget] != pos: + rtarget = pre[rtarget] #does the if jump just beyond a jump op, then this is probably an if statement if code[pre[rtarget]] in (JA, JF): From 6986b19f7a7de82807497d68e8e2a2131cdfbfc5 Mon Sep 17 00:00:00 2001 From: wibiti Date: Mon, 20 Feb 2012 11:06:48 -0600 Subject: [PATCH 04/36] Fixes a rare try-except-else incorrect parsing -Pretty much rewrote all the try-except-else related code and grammar, so there may be some new bugs in there. --- uncompyle2/Parser.py | 130 ++++++--------------------- uncompyle2/Scanner.py | 205 +++++++++++++++++------------------------- uncompyle2/Walker.py | 26 ++---- 3 files changed, 119 insertions(+), 242 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index dee308e..eef86dc 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -286,7 +286,6 @@ def p_grammar(self, args): lastc_stmt ::= whileelselaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtr - lastc_stmt ::= c_trystmt c_stmts_opt ::= c_stmts c_stmts_opt ::= passstmt @@ -298,7 +297,6 @@ def p_grammar(self, args): lastl_stmt ::= iflaststmtl lastl_stmt ::= ifelsestmtl - lastl_stmt ::= c_trystmt lastl_stmt ::= forelselaststmtl l_stmts_opt ::= l_stmts @@ -354,6 +352,7 @@ def p_grammar(self, args): stmt ::= forstmt stmt ::= forelsestmt stmt ::= trystmt + stmt ::= tryelsestmt stmt ::= tryfinallystmt stmt ::= withstmt stmt ::= withasstmt @@ -430,116 +429,45 @@ def p_grammar(self, args): _jump_back_jump_back_else ::= JUMP_BACK JUMP_BACK_ELSE - trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK JUMP_FORWARD - COME_FROM except_stmts + trystmt ::= SETUP_EXCEPT stmts_opt POP_BLOCK + JUMP_FORWARD COME_FROM except_stmts + END_FINALLY COME_FROM COME_FROM - trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK - COME_FROM JUMP_FORWARD except_stmts + trystmt ::= SETUP_EXCEPT stmts_opt POP_BLOCK + jmp_abs COME_FROM except_stmts + END_FINALLY COME_FROM - except_stmts ::= except_cond1 except_sub_stmts - except_stmts ::= except_cond2 except_sub_stmts - except_stmts ::= except JUMP_FORWARD try_end COME_FROM - except_stmts ::= except2 END_FINALLY COME_FROM - except_stmts ::= END_FINALLY COME_FROM + tryelsestmt ::= SETUP_EXCEPT stmts_opt POP_BLOCK + JUMP_FORWARD COME_FROM except_stmts + END_FINALLY COME_FROM try_else_suite COME_FROM - except_stmts_a ::= except_cond1 except_sub_stmts_a - except_stmts_a ::= except_cond2 except_sub_stmts_a - except_stmts_a ::= except JUMP_FORWARD try_end COME_FROM - except_stmts_a ::= except2 try_end - except_stmts_a ::= try_end + tryelsestmt ::= SETUP_EXCEPT stmts_opt POP_BLOCK + jmp_abs COME_FROM except_stmts + END_FINALLY try_else_suite COME_FROM - except_sub_stmts ::= c_stmts_opt JUMP_FORWARD except_stmts_a COME_FROM - except_sub_stmts ::= return_stmts except_stmts - except_sub_stmts ::= continue_stmts jmp_back except_stmts + except_stmts ::= except_stmts except_stmt + except_stmts ::= except_stmt - except_sub_stmts_a ::= c_stmts_opt JUMP_FORWARD except_stmts_a COME_FROM - except_sub_stmts_a ::= return_stmts except_stmts_a - except_sub_stmts_a ::= continue_stmts jmp_back except_stmts_a - - jmp_back ::= JUMP_BACK - jmp_back ::= JUMP_BACK JUMP_BACK_ELSE - continue_stmts ::= continue_stmt - continue_stmts ::=_stmts continue_stmt - - try_end ::= END_FINALLY COME_FROM - try_end ::= except_else - except_else ::= END_FINALLY COME_FROM stmts + except_stmt ::= except_cond1 except_suite + except_stmt ::= except_cond2 except_suite + except_stmt ::= except + except_suite ::= c_stmts_opt JUMP_FORWARD + except_suite ::= c_stmts_opt jmp_abs + except_suite ::= return_stmts + except_cond1 ::= DUP_TOP expr COMPARE_OP - POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP - + POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP except_cond2 ::= DUP_TOP expr COMPARE_OP POP_JUMP_IF_FALSE POP_TOP designator POP_TOP - except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt - - except2 ::= POP_TOP POP_TOP POP_TOP return_stmts - - - c_trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK JUMP_FORWARD - COME_FROM c_except_stmts - - c_trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK - COME_FROM JUMP_FORWARD c_except_stmts - - c_trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK jmp_abs - COME_FROM c_except_stmts2 - - c_trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK - COME_FROM jmp_abs c_except_stmts2 - - c_except_stmts ::= except_cond1 c_except_sub_stmts - c_except_stmts ::= except_cond2 c_except_sub_stmts - c_except_stmts ::= except jmp_abs try_end3 - c_except_stmts ::= except2 END_FINALLY COME_FROM - c_except_stmts ::= END_FINALLY COME_FROM - - c_except_stmts_a ::= except_cond1 c_except_sub_stmts_a - c_except_stmts_a ::= except_cond2 c_except_sub_stmts_a - c_except_stmts_a ::= except jmp_abs try_end3 - c_except_stmts_a ::= except2 try_end3 - c_except_stmts_a ::= try_end3 - - try_end3 ::= END_FINALLY COME_FROM - try_end3 ::= except_else3 - except_else3 ::= END_FINALLY COME_FROM c_stmts - except_else3 ::= END_FINALLY COME_FROM l_stmts - - c_except_sub_stmts ::= c_stmts_opt jmp_abs c_except_stmts_a - c_except_sub_stmts ::= return_stmts c_except_stmts - - c_except_sub_stmts_a ::= c_stmts_opt jmp_abs c_except_stmts_a - c_except_sub_stmts_a ::= return_stmts c_except_stmts_a - - c_except_stmts2 ::= except_cond1 c_except_sub_stmts2 - c_except_stmts2 ::= except_cond2 c_except_sub_stmts2 - c_except_stmts2 ::= except jmp_abs try_end2 - c_except_stmts2 ::= except2 END_FINALLY - c_except_stmts2 ::= END_FINALLY - - c_except_stmts2_a ::= except_cond1 c_except_sub_stmts2_a - c_except_stmts2_a ::= except_cond2 c_except_sub_stmts2_a - c_except_stmts2_a ::= except jmp_abs try_end2 - c_except_stmts2_a ::= except2 try_end2 - c_except_stmts2_a ::= try_end2 - - c_except_sub_stmts2 ::= c_stmts_opt jmp_abs c_except_stmts2_a - c_except_sub_stmts2 ::= return_stmts c_except_stmts2 - - c_except_sub_stmts2_a ::= c_stmts_opt jmp_abs c_except_stmts2_a - c_except_sub_stmts2_a ::= return_stmts c_except_stmts2_a - - try_end2 ::= END_FINALLY - try_end2 ::= except_else2 - except_else2 ::= END_FINALLY c_stmts - except_else2 ::= END_FINALLY l_stmts + except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt JUMP_FORWARD + except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt jmp_abs + except ::= POP_TOP POP_TOP POP_TOP return_stmts + + try_else_suite ::= c_stmts + try_else_suite ::= l_stmts jmp_abs ::= JUMP_ABSOLUTE jmp_abs ::= JUMP_BACK diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index a8dc434..45fe501 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -11,6 +11,7 @@ import dis from collections import namedtuple from array import array +from operator import itemgetter globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()}) @@ -101,18 +102,13 @@ def disassemble(self, co, classname=None): self.code = code = array('B', co.co_code) n = len(code) self.prev = [0] - i=0 - while i < n: + for i in self.op_range(0, n): c = code[i] op = code[i] + self.prev.append(i) if op >= dis.HAVE_ARGUMENT: self.prev.append(i) self.prev.append(i) - self.prev.append(i) - i = i + 3 - else: - self.prev.append(i) - i = i + 1 self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) @@ -153,27 +149,25 @@ def unmangle(name): names = co.co_names varnames = co.co_varnames - i = 0 extended_arg = 0 - while i < n: - offset = i - k = 0 - if cf.has_key(offset): + for offset in self.op_range(0, n): + + if offset in cf: + k = 0 for j in cf[offset]: rv.append(Token('COME_FROM', None, repr(j), offset="%s_%d" % (offset, k) )) k += 1 - op = code[i] + op = code[offset] opname = dis.opname[op] - i += 1 oparg = None; pattr = None if op >= dis.HAVE_ARGUMENT: - oparg = code[i] + code[i+1] * 256 + extended_arg + oparg = code[offset+1] + code[offset+2] * 256 + extended_arg extended_arg = 0 - i += 2 if op == dis.EXTENDED_ARG: extended_arg = oparg * 65536L + continue if op in dis.hasconst: const = co.co_consts[oparg] if type(const) == types.CodeType: @@ -198,7 +192,7 @@ def unmangle(name): elif op in dis.hasname: pattr = names[oparg] elif op in dis.hasjrel: - pattr = repr(i + oparg) + pattr = repr(offset + 3 + oparg) elif op in dis.hasjabs: pattr = repr(oparg) elif op in dis.haslocal: @@ -242,7 +236,7 @@ def unmangle(name): rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) - if self.jump_back_else.get(offset, False): + if offset in self.jump_back_else: rv.append(Token('JUMP_BACK_ELSE', None, None, offset="%s_" % offset )) @@ -283,8 +277,7 @@ def first_instr(self, start, end, instr, target=None, exact=True): pos = None distance = len(code) - i = start - while i < end: + for i in self.op_range(start, end): op = code[i] if op in instr: if target is None: @@ -297,10 +290,6 @@ def first_instr(self, start, end, instr, target=None, exact=True): if _distance < distance: distance = _distance pos = i - if op < HAVE_ARGUMENT: - i += 1 - else: - i += 3 return pos def last_instr(self, start, end, instr, target=None, exact=True): @@ -326,8 +315,7 @@ def last_instr(self, start, end, instr, target=None, exact=True): pos = None distance = len(code) - i = start - while i < end: + for i in self.op_range(start, end): op = code[i] if op in instr: if target is None: @@ -342,10 +330,6 @@ def last_instr(self, start, end, instr, target=None, exact=True): if _distance <= distance: distance = _distance pos = i - if op < HAVE_ARGUMENT: - i += 1 - else: - i += 3 return pos def all_instr(self, start, end, instr, target=None, include_beyond_target=False): @@ -367,8 +351,7 @@ def all_instr(self, start, end, instr, target=None, include_beyond_target=False) except: instr = [instr] result = [] - i = start - while i < end: + for i in self.op_range(start, end): op = code[i] if op in instr: if target is None: @@ -379,7 +362,6 @@ def all_instr(self, start, end, instr, target=None, include_beyond_target=False) result.append(i) elif t == target: result.append(i) - i += self.op_size(op) return result def op_size(self, op): @@ -387,6 +369,11 @@ def op_size(self, op): return 1 else: return 3 + + def op_range(self, start, end): + while start < end: + yield start + start += self.op_size(self.code[start]) def build_stmt_indices(self): code = self.code @@ -420,21 +407,18 @@ def build_stmt_indices(self): pass_stmts = set() for seq in stmt_opcode_seqs: - i = start - while i+len(seq)-1 < end: + for i in self.op_range(start, end-(len(seq)+1)): match = True - j = i for elem in seq: - if elem != code[j]: + if elem != code[i]: match = False break - j += self.op_size(code[j]) + i += self.op_size(code[i]) if match: - j = self.prev[j] - stmts.add(j) - pass_stmts.add(j) - i += self.op_size(code[i]) + i = self.prev[i] + stmts.add(i) + pass_stmts.add(i) if pass_stmts: stmt_list = list(stmts) @@ -483,56 +467,32 @@ def remove_mid_line_ifs(self, ifs): return filtered - def next_except_jump(self, start, end, target): + def next_except_jump(self, start): """ Return the next jump that was generated by an except SomeException: construct in a try...except...else clause or None if not found. """ HAVE_ARGUMENT = dis.HAVE_ARGUMENT - inner_try = self.first_instr(start, end, SETUP_EXCEPT, end, False) - - lookup = [JA, JF] - while start < end: - jmp = self.first_instr(start, end, lookup, target) - if jmp is None: - return None - if jmp == self.prev[end]: - return jmp - after = jmp + 3 - ops = [None, None, None, None] - opp = [0, 0, 0, 0] - pos = 0 - x = jmp+3 - while x <= end and pos < 4: - op = self.code[x] - if op >= HAVE_ARGUMENT: - break - ops[pos] = op - opp[pos] = x - pos += 1 - x += 1 - if (ops[0] == END_FINALLY and opp[0] == end)\ - or (ops[0] == DUP_TOP)\ - or (ops[0] == ops[1] == ops[2] == POP_TOP): - inner_trys = self.all_instr(start, jmp, SETUP_EXCEPT) - inner_finallys = self.all_instr(start, jmp, END_FINALLY) - if len(inner_trys) == len(inner_finallys): - return jmp - start = jmp + 3 - return None - - def fix_parent(self, target, parent): - """Fix parent boundaries if needed""" - start = parent['start'] - end = parent['end'] - - if target >= start or end-start < 3 or target not in self.loops: - return - if self.code[self.prev[end]]==JA: - cont_target = self.get_target(end-3, JA) - if target == cont_target: - parent['end'] = end-3 + except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE) + if except_match: + jmp = self.prev[self.get_target(except_match)] + if self.code[jmp] not in (JA, JF, RETURN_VALUE): + print '############################', jmp, dis.opname[self.code[jmp]] + return jmp + + count_END_FINALLY = 0 + count_SETUP_ = 0 + for i in self.op_range(start, len(self.code)): + op = self.code[i] + if op == END_FINALLY: + if count_END_FINALLY == count_SETUP_: + assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE) + return self.prev[i] + count_END_FINALLY += 1 + elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): + count_SETUP_ += 1 + def restrict_to_parent(self, target, parent): """Restrict pos to parent boundaries.""" @@ -628,39 +588,37 @@ def detect_structure(self, pos, op=None): 'start': start, 'end': end-4}) ## Now isolate the except and else blocks - start = end - target = self.get_target(self.prev[start]) - self.fix_parent(target, parent) - end = self.restrict_to_parent(target, parent) - #if target != end: - # self.fixed_jumps[self.prev[start]] = end + end_else = start_else = self.get_target(self.prev[end]) - end_finally = self.last_instr(start, end, END_FINALLY) - if end_finally is None: - return - lookup = [JF] - jump_end = self.last_instr(start, end, lookup) - if jump_end: - target = self.get_target(jump_end) - end = self.restrict_to_parent(target, parent) - # if target != end: - # self.fixed_jumps[jump_end] = end - ## Add the try-else block - self.structs.append({'type': 'try-else', - 'start': end_finally+1, - 'end': end}) ## Add the except blocks - i = start - while i < end_finally: - jmp = self.next_except_jump(i, end_finally, target) - if jmp is None: - break - self.structs.append({'type': 'except', - 'start': i, - 'end': jmp}) - # if target != end: - # self.fixed_jumps[jmp] = end - i = jmp+3 + i = end + while self.code[i] != END_FINALLY: + jmp = self.next_except_jump(i) + if self.code[jmp] == RETURN_VALUE: + self.structs.append({'type': 'except', + 'start': i, + 'end': jmp+1}) + i = jmp + 1 + else: + if self.get_target(jmp) != start_else: + end_else = self.get_target(jmp) + if self.code[jmp] == JF: + self.fixed_jumps[jmp] = -1 + self.structs.append({'type': 'except', + 'start': i, + 'end': jmp}) + i = jmp + 3 + + ## Add the try-else block + if end_else != start_else: + r_end_else = self.restrict_to_parent(end_else, parent) + self.structs.append({'type': 'try-else', + 'start': i+1, + 'end': r_end_else}) + self.fixed_jumps[i] = r_end_else + else: + self.fixed_jumps[i] = i+1 + elif op in (PJIF, PJIT): @@ -681,7 +639,7 @@ def detect_structure(self, pos, op=None): if target == rtarget: if code[pre[target]] == JA and code[target] != POP_BLOCK: if self.get_target(pre[target]) < pos: - self.jump_back_else[pre[target]] = True + self.jump_back_else.add(pre[target]) # is this an if and if op == PJIF: @@ -790,12 +748,11 @@ def find_jump_targets(self, code): 'end': n-1}] self.loops = [] ## All loop entry points self.fixed_jumps = {} ## Map fixed jumps to their real destination - self.jump_back_else = {} + self.jump_back_else = set() self.build_stmt_indices() targets = {} - i = 0 - while i < n: + for i in self.op_range(0, n): op = code[i] ## Determine structures and fix jumps for 2.3+ @@ -814,9 +771,11 @@ def find_jump_targets(self, code): if (oparg > i): label = oparg - if label is not None: + if label is not None and label != -1: targets[label] = targets.get(label, []) + [i] - i += self.op_size(op) + elif op == END_FINALLY and i in self.fixed_jumps: + label = self.fixed_jumps[i] + targets[label] = targets.get(label, []) + [i] return targets diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index bfe1fe1..6689cd3 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -270,22 +270,13 @@ 'forelselaststmtl': ( '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), 'trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ), - 'c_trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ), + 'tryelsestmt': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 5, -2 ), 'tf_trystmt': ( '%c%-%c%+', 1, 5 ), + 'tf_tryelsestmt': ( '%c%-%c%|else:\n%+%c', 1, 5, -2 ), 'except': ( '%|except:\n%+%c%-', 3 ), - 'except2': ( '%|except:\n%+%c%-', 3 ), 'except_cond1': ( '%|except %c:\n', 1 ), 'except_cond2': ( '%|except %c as %c:\n', 1, 5 ), - 'except_sub_stmts': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'except_sub_stmts_a': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'c_except_sub_stmts': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'c_except_sub_stmts2': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'c_except_sub_stmts_a': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'c_except_sub_stmts2_a': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'except_cond_cont': ( '%c%+%|continue\n%-', 0), - 'except_else': ( '%|else:\n%+%c%-', 2 ), - 'except_else2': ( '%|else:\n%+%c%-', 1 ), - 'except_else3': ( '%|else:\n%+%c%-', 2 ), + 'except_suite': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-', 1, 5 ), 'withstmt': ( '%|with %c:\n%+%c%-', 0, 3), 'withasstmt': ( '%|with %c as %c:\n%+%c%-', 0, 2, 3), @@ -666,12 +657,11 @@ def n_delete_subscr(self, node): # 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-', 1, 5 ), def n_tryfinallystmt(self, node): - if node[1] == 'stmts' and \ - len(node[1]) == 1 and \ - node[1][0] == 'sstmt' and \ - node[1][0][0] == 'stmt' and \ - node[1][0][0][0] == 'trystmt' or node[1][0][0] == 'c_trystmt': - node[1][0][0][0].type = 'tf_trystmt' + if len(node[1]) == 1 and node[1][0] == 'sstmt' and node[1][0][0] == 'stmt': + if node[1][0][0][0] == 'trystmt': + node[1][0][0][0].type = 'tf_trystmt' + if node[1][0][0][0] == 'tryelsestmt': + node[1][0][0][0].type = 'tf_tryelsestmt' self.default(node) def n_exec_stmt(self, node): From c5238380124f5313c3afe1d40077d59f80bc3873 Mon Sep 17 00:00:00 2001 From: wibiti Date: Mon, 20 Feb 2012 21:05:54 -0600 Subject: [PATCH 05/36] bug fixes; keeps print and import statements as single lines -Fixes a major if-else bug. -Fixes a while alignment error -print and import statements won't be split into multiple statements --- uncompyle2/Parser.py | 41 ++++++++++++++++++++++------- uncompyle2/Scanner.py | 61 +++++++++++++++++++++++++++---------------- uncompyle2/Walker.py | 37 +++++++++++++++++++++----- 3 files changed, 100 insertions(+), 39 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index eef86dc..b230477 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -196,12 +196,18 @@ def p_assign(self, args): def p_print(self, args): ''' - stmt ::= print_stmt - stmt ::= print_stmt_nl - stmt ::= print_nl_stmt - print_stmt ::= expr PRINT_ITEM - print_nl_stmt ::= PRINT_NEWLINE - print_stmt_nl ::= print_stmt print_nl_stmt + stmt ::= print_items_stmt + stmt ::= print_nl + stmt ::= print_items_nl_stmt + + print_items_stmt ::= expr PRINT_ITEM print_items_opt + print_items_nl_stmt ::= expr PRINT_ITEM print_items_opt PRINT_NEWLINE_CONT + print_items_opt ::= print_items + print_items_opt ::= + print_items ::= print_items print_item + print_items ::= print_item + print_item ::= expr PRINT_ITEM_CONT + print_nl ::= PRINT_NEWLINE ''' def p_print_to(self, args): @@ -241,6 +247,7 @@ def p_import20(self, args): stmt ::= _25_importstmt stmt ::= _25_importfrom stmt ::= _25_importstar + stmt ::= importmultiple importstmt2 ::= LOAD_CONST import_as importstar2 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR @@ -257,6 +264,18 @@ def p_import20(self, args): _25_importstmt ::= LOAD_CONST LOAD_CONST import_as _25_importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR _25_importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP + _25_importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR + _25_importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP + importmultiple ::= LOAD_CONST LOAD_CONST import_as imports_cont + + imports_cont ::= imports_cont import_cont + imports_cont ::= import_cont + import_cont ::= LOAD_CONST LOAD_CONST import_as_cont + import_as_cont ::= IMPORT_NAME_CONT designator + import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR designator + import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR designator + import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR LOAD_ATTR designator + import_as_cont ::= IMPORT_FROM designator ''' def p_grammar(self, args): @@ -286,6 +305,7 @@ def p_grammar(self, args): lastc_stmt ::= whileelselaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtr + lastc_stmt ::= ifelsestmtc c_stmts_opt ::= c_stmts c_stmts_opt ::= passstmt @@ -418,10 +438,11 @@ def p_grammar(self, args): iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK JUMP_BACK_ELSE - ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD c_stmts COME_FROM + ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD stmts COME_FROM ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD return_stmts COME_FROM - ifelsestmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE c_stmts - ifelsestmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE return_stmts + + ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE c_stmts + ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE return_stmts ifelsestmtr ::= testexpr return_stmts return_stmts ifelsestmtl ::= testexpr l_stmts_opt JUMP_ABSOLUTE l_stmts @@ -669,7 +690,7 @@ def p_expr(self, args): ''' def nonterminal(self, nt, args): - collect = ('stmts', 'exprlist', 'kvlist', '_stmts') + collect = ('stmts', 'exprlist', 'kvlist', '_stmts', 'print_items') if nt in collect and len(args) > 1: # diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 45fe501..9ac4d66 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -13,6 +13,8 @@ from array import array from operator import itemgetter +HAVE_ARGUMENT = dis.HAVE_ARGUMENT + globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()}) PJIF = POP_JUMP_IF_FALSE @@ -106,13 +108,12 @@ def disassemble(self, co, classname=None): c = code[i] op = code[i] self.prev.append(i) - if op >= dis.HAVE_ARGUMENT: + if op >= HAVE_ARGUMENT: self.prev.append(i) self.prev.append(i) self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) - self.if_lines = {} j = 0 linestarts = list(dis.findlinestarts(co)) linestartoffsets = {a for (a, _) in linestarts} @@ -122,15 +123,10 @@ def disassemble(self, co, classname=None): self.lines.append(linetuple(prev_line_no, start_byte)) j += 1 last_op = code[self.prev[start_byte]] - if last_op in (PJIF, PJIT): - self.if_lines[prev_line_no] = True - else: - self.if_lines[prev_line_no] = False (prev_start_byte, prev_line_no) = (start_byte, line_no) while j < n: self.lines.append(linetuple(prev_line_no, n)) j+=1 - self.if_lines[prev_line_no] = False cf = self.find_jump_targets(code) @@ -149,6 +145,28 @@ def unmangle(name): names = co.co_names varnames = co.co_varnames + last_stmt = self.next_stmt[0] + i = self.next_stmt[last_stmt] + replace = {} + while i < n-1: + if self.lines[last_stmt].next > i: + if code[last_stmt] == PRINT_ITEM: + if code[i] == PRINT_ITEM: + replace[i] = 'PRINT_ITEM_CONT' + elif code[i] == PRINT_NEWLINE: + replace[i] = 'PRINT_NEWLINE_CONT' + last_stmt = i + i = self.next_stmt[i] + + imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) + if len(imports) > 1: + last_import = imports[0] + for i in imports[1:]: + if self.lines[last_import].next > i: + if code[last_import] == IMPORT_NAME == code[i]: + replace[i] = 'IMPORT_NAME_CONT' + last_import = i + extended_arg = 0 for offset in self.op_range(0, n): @@ -162,7 +180,7 @@ def unmangle(name): op = code[offset] opname = dis.opname[op] oparg = None; pattr = None - if op >= dis.HAVE_ARGUMENT: + if op >= HAVE_ARGUMENT: oparg = code[offset+1] + code[offset+2] * 256 + extended_arg extended_arg = 0 if op == dis.EXTENDED_ARG: @@ -233,8 +251,11 @@ def unmangle(name): elif op == IMPORT_NAME: if pattr == '': pattr = '.' - - rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) + + if offset not in replace: + rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) + else: + rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) if offset in self.jump_back_else: rv.append(Token('JUMP_BACK_ELSE', None, None, @@ -270,8 +291,6 @@ def first_instr(self, start, end, instr, target=None, exact=True): code = self.code assert(start>=0 and end<=len(code)) - HAVE_ARGUMENT = dis.HAVE_ARGUMENT - try: None in instr except: instr = [instr] @@ -308,8 +327,6 @@ def last_instr(self, start, end, instr, target=None, exact=True): if not (start>=0 and end<=len(code)): return None - HAVE_ARGUMENT = dis.HAVE_ARGUMENT - try: None in instr except: instr = [instr] @@ -345,8 +362,6 @@ def all_instr(self, start, end, instr, target=None, include_beyond_target=False) code = self.code assert(start>=0 and end<=len(code)) - HAVE_ARGUMENT = dis.HAVE_ARGUMENT - try: None in instr except: instr = [instr] @@ -365,7 +380,7 @@ def all_instr(self, start, end, instr, target=None, include_beyond_target=False) return result def op_size(self, op): - if op < dis.HAVE_ARGUMENT: + if op < HAVE_ARGUMENT: return 1 else: return 3 @@ -472,7 +487,6 @@ def next_except_jump(self, start): Return the next jump that was generated by an except SomeException: construct in a try...except...else clause or None if not found. """ - HAVE_ARGUMENT = dis.HAVE_ARGUMENT except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE) if except_match: @@ -562,9 +576,8 @@ def detect_structure(self, pos, op=None): loop_type = 'for' else: loop_type = 'while' - (line_no, next_line_byte) = self.lines[pos] test = self.prev[next_line_byte] - assert(test is not None) + self.ignore_if.add(test) test_target = self.get_target(test) if test_target > (jump_back+3): jump_back = test_target @@ -608,7 +621,7 @@ def detect_structure(self, pos, op=None): 'start': i, 'end': jmp}) i = jmp + 3 - + ## Add the try-else block if end_else != start_else: r_end_else = self.restrict_to_parent(end_else, parent) @@ -691,6 +704,10 @@ def detect_structure(self, pos, op=None): self.fixed_jumps[pos] = pre[next] return + #don't add a struct for a while test, it's already taken care of + if pos in self.ignore_if: + return + if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts and pre[rtarget] != pos: rtarget = pre[rtarget] @@ -737,7 +754,6 @@ def find_jump_targets(self, code): This procedure is modelled after dis.findlables(), but here for each target the number of jumps are counted. """ - HAVE_ARGUMENT = dis.HAVE_ARGUMENT hasjrel = dis.hasjrel hasjabs = dis.hasjabs @@ -749,6 +765,7 @@ def find_jump_targets(self, code): self.loops = [] ## All loop entry points self.fixed_jumps = {} ## Map fixed jumps to their real destination self.jump_back_else = set() + self.ignore_if = set() self.build_stmt_indices() targets = {} diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 6689cd3..3cf1a88 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -229,9 +229,10 @@ 'assert2': ( '%|assert %c, %c\n' , 0, 3 ), 'assert_expr_or': ( '%c or %c', 0, 2 ), 'assert_expr_and': ( '%c and %c', 0, 2 ), - 'print_stmt': ( '%|print %c,\n', 0 ), - 'print_stmt_nl': ( '%|print %[0]C\n', (0,1, None) ), - 'print_nl_stmt': ( '%|print\n', ), + 'print_items_stmt': ( '%|print %c%c,\n', 0, 2), + 'print_items_nl_stmt': ( '%|print %c%c\n', 0, 2), + 'print_item': ( ', %c', 0), + 'print_nl': ( '%|print\n', ), 'print_to': ( '%|print >> %c, %c,\n', 0, 1 ), 'print_to_nl': ( '%|print >> %c, %c\n', 0, 1 ), 'print_nl_to': ( '%|print >> %c\n', 0 ), @@ -251,6 +252,7 @@ 'testtrue': ( 'not %p', (0,22) ), 'ifelsestmt': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), + 'ifelsestmtc': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelsestmtl': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelifstmt': ( '%|if %c:\n%+%c%-%c', 0, 1, 3 ), 'elifelifstmt': ( '%|elif %c:\n%+%c%-%c', 0, 1, 3 ), @@ -294,6 +296,8 @@ '_25_importstmt': ( '%|import %c\n', 2), '_25_importstar': ( '%|from %[2]{pattr} import *\n', ), '_25_importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), + 'importmultiple': ( '%|import %c%c\n', 2, 3), + 'import_cont' : ( ', %c', 2), # CE - Fixes for tuples '_25_assign2': ( '%|(%c, %c,) = (%c, %c)\n', 3, 4, 0, 1 ), @@ -557,8 +561,9 @@ def n_return_stmt(self, node): self.preorder(node[0]) self.prune() else: - self.write(self.indent, 'return ') + self.write(self.indent, 'return') if self.return_none or node != AST('return_stmt', [NONE, Token('RETURN_VALUE')]): + self.write(' ') self.preorder(node[0]) self.print_() self.prune() # stop recursing @@ -682,15 +687,15 @@ def n_exec_stmt(self, node): def n_ifelsestmt(self, node, preprocess=0): if len(node[3]) == 1 and not node[3][0] == 'continue_stmt': ifnode = node[3][0][0][0] - if node[3][0] == 'lastc_stmt' and node[3][0][0] == 'iflaststmt': + if node[3][0] == 'lastc_stmt' and node[3][0][0].type == 'iflaststmt': node.type = 'ifelifstmt' node[3][0][0].type = 'elifstmt' - elif ifnode == 'ifelsestmt': + elif ifnode.type in ('ifelsestmt', 'ifelsestmtc'): node.type = 'ifelifstmt' self.n_ifelsestmt(ifnode, preprocess=1) if ifnode == 'ifelifstmt': ifnode.type = 'elifelifstmt' - elif ifnode == 'ifelsestmt': + elif ifnode.type in ('ifelsestmt', 'ifelsestmtc'): ifnode.type = 'elifelsestmt' elif ifnode == 'ifstmt': node.type = 'ifelifstmt' @@ -698,6 +703,22 @@ def n_ifelsestmt(self, node, preprocess=0): if not preprocess: self.default(node) + def n_ifelsestmtc(self, node, preprocess=0): + if len(node[3]) == 1 and node[3][0] == 'lastc_stmt': + ifnode = node[3][0][0] + if ifnode == 'ifelsestmtc': + node.type = 'ifelifstmt' + self.n_ifelsestmtc(ifnode, preprocess=1) + if ifnode == 'ifelifstmt': + ifnode.type = 'elifelifstmt' + elif ifnode == 'ifelsestmtc': + ifnode.type = 'elifelsestmt' + elif ifnode == 'iflaststmt': + node.type = 'ifelifstmt' + ifnode.type = 'elifstmt' + if not preprocess: + self.default(node) + def n_ifelsestmtl(self, node, preprocess=0): if len(node[3]) == 1 and node[3][0] == 'lastl_stmt': ifnode = node[3][0][0] @@ -723,6 +744,8 @@ def n_import_as(self, node): else: self.write(iname, ' as ', sname) self.prune() # stop recursing + + n_import_as_cont = n_import_as def n_mkfunc(self, node): self.write(node[-2].attr.co_name) # = code.co_name From b746e6cc23126d5af4f33a75ab5ac747de30fedc Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 22 Feb 2012 23:42:40 -0600 Subject: [PATCH 06/36] Fixes an error; Some cosmetic improvements - Removes some unnecessary parentheses, for instance: before: '(a,b,) = (c, d)' now: 'a, b = c, d' - Changes some 'elif' parsing/printing --- README | 12 ++-- scripts/uncompyle2 | 4 ++ uncompyle2/Parser.py | 101 ++++++++++++++--------------- uncompyle2/Scanner.py | 2 + uncompyle2/Walker.py | 143 +++++++++++++++++++++++++++--------------- 5 files changed, 152 insertions(+), 110 deletions(-) diff --git a/README b/README index 689c68a..ac8b6a5 100644 --- a/README +++ b/README @@ -1,13 +1,15 @@ - uncompyle2 -- A Python 2.7 byte-code decompiler - 0.12 - 2012-1-23 + uncompyle2 + A Python 2.7 byte-code decompiler, written in Python 2.7 + 0.13 + 2012-2-22 Introduction ------------ 'uncompyle2' converts Python byte-code back into equivalent Python -source. It accepts byte-code from Python version 2.7 only. +source. It accepts byte-code from Python version 2.7 only. Additionally, +it will only run on Python 2.7. The generated source is very readable: docstrings, lists, tuples and hashes get pretty-printed. @@ -51,7 +53,7 @@ Features Requirements ------------ -'decompile' requires Python 2.2 or later. +uncompyle2 requires Python 2.7 Installation diff --git a/scripts/uncompyle2 b/scripts/uncompyle2 index 865ee16..26067d2 100755 --- a/scripts/uncompyle2 +++ b/scripts/uncompyle2 @@ -48,6 +48,10 @@ import os.path from uncompyle2 import main, verify import time +if sys.version[:3] != '2.7': + print >>sys.stderr, 'Error: uncompyle2 requires Python 2.7.' + sys.exit(-1) + showasm = showast = do_verify = numproc = recurse_dirs = 0 outfile = '-' out_base = None diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index b230477..974f796 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -222,28 +222,9 @@ def p_print_to(self, args): print_to_items ::= print_to_item print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO ''' - # expr print_to* POP_TOP - # expr { print_to* } PRINT_NEWLINE_TO - - def p_import15(self, args): - ''' - stmt ::= importstmt - stmt ::= importfrom - - importstmt ::= IMPORT_NAME STORE_FAST - importstmt ::= IMPORT_NAME STORE_NAME - - importfrom ::= IMPORT_NAME importlist POP_TOP - importlist ::= importlist IMPORT_FROM - importlist ::= IMPORT_FROM - ''' def p_import20(self, args): - ''' - stmt ::= importstmt2 - stmt ::= importfrom2 - stmt ::= importstar2 - + ''' stmt ::= _25_importstmt stmt ::= _25_importfrom stmt ::= _25_importstar @@ -283,8 +264,8 @@ def p_grammar(self, args): stmts ::= stmts sstmt stmts ::= sstmt sstmt ::= stmt - sstmt ::= return_stmt sstmt ::= ifelsestmtr + sstmt ::= return_stmt sstmt ::= return_stmt RETURN_LAST stmts_opt ::= stmts @@ -297,15 +278,14 @@ def p_grammar(self, args): c_stmts ::= _stmts c_stmts ::= _stmts lastc_stmt c_stmts ::= lastc_stmt - c_stmts ::= _stmts lastl_stmt continue_stmt - c_stmts ::= lastl_stmt continue_stmt - c_stmts ::= continue_stmt + c_stmts ::= continue_stmts lastc_stmt ::= iflaststmt lastc_stmt ::= whileelselaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtr lastc_stmt ::= ifelsestmtc + lastc_stmt ::= tryelsestmtc c_stmts_opt ::= c_stmts c_stmts_opt ::= passstmt @@ -318,9 +298,22 @@ def p_grammar(self, args): lastl_stmt ::= iflaststmtl lastl_stmt ::= ifelsestmtl lastl_stmt ::= forelselaststmtl + lastl_stmt ::= tryelsestmtl l_stmts_opt ::= l_stmts l_stmts_opt ::= passstmt + + suite_stmts ::= _stmts + suite_stmts ::= return_stmts + suite_stmts ::= continue_stmts + + suite_stmts_opt ::= suite_stmts + suite_stmts_opt ::= passstmt + + else_suite ::= suite_stmts + else_suitel ::= l_stmts + else_suitec ::= c_stmts + else_suitec ::= return_stmts designList ::= designator designator designList ::= designator DUP_TOP designList @@ -350,6 +343,9 @@ def p_grammar(self, args): continue_stmt ::= JUMP_BACK continue_stmt ::= CONTINUE_LOOP + continue_stmts ::= _stmts lastl_stmt continue_stmt + continue_stmts ::= lastl_stmt continue_stmt + continue_stmts ::= continue_stmt stmt ::= raise_stmt raise_stmt ::= exprlist RAISE_VARARGS @@ -438,34 +434,34 @@ def p_grammar(self, args): iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK JUMP_BACK_ELSE - ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD stmts COME_FROM - ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD return_stmts COME_FROM + ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM + + ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec - ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE c_stmts - ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE return_stmts ifelsestmtr ::= testexpr return_stmts return_stmts - ifelsestmtl ::= testexpr l_stmts_opt JUMP_ABSOLUTE l_stmts - ifelsestmtl ::= testexpr l_stmts_opt _jump_back_jump_back_else l_stmts + ifelsestmtl ::= testexpr l_stmts_opt JUMP_ABSOLUTE else_suitel + ifelsestmtl ::= testexpr l_stmts_opt _jump_back_jump_back_else else_suitel _jump_back_jump_back_else ::= JUMP_BACK JUMP_BACK_ELSE - trystmt ::= SETUP_EXCEPT stmts_opt POP_BLOCK - JUMP_FORWARD COME_FROM except_stmts - END_FINALLY COME_FROM COME_FROM + trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + try_middle COME_FROM - trystmt ::= SETUP_EXCEPT stmts_opt POP_BLOCK - jmp_abs COME_FROM except_stmts - END_FINALLY COME_FROM + tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + try_middle else_suite COME_FROM - tryelsestmt ::= SETUP_EXCEPT stmts_opt POP_BLOCK - JUMP_FORWARD COME_FROM except_stmts - END_FINALLY COME_FROM try_else_suite COME_FROM + tryelsestmtc ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + try_middle else_suitec COME_FROM - tryelsestmt ::= SETUP_EXCEPT stmts_opt POP_BLOCK - jmp_abs COME_FROM except_stmts - END_FINALLY try_else_suite COME_FROM + tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + try_middle else_suitel COME_FROM + try_middle ::= jmp_abs COME_FROM except_stmts + END_FINALLY + try_middle ::= JUMP_FORWARD COME_FROM except_stmts + END_FINALLY COME_FROM + except_stmts ::= except_stmts except_stmt except_stmts ::= except_stmt @@ -487,24 +483,21 @@ def p_grammar(self, args): except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt jmp_abs except ::= POP_TOP POP_TOP POP_TOP return_stmts - try_else_suite ::= c_stmts - try_else_suite ::= l_stmts - jmp_abs ::= JUMP_ABSOLUTE jmp_abs ::= JUMP_BACK jmp_abs ::= JUMP_BACK JUMP_BACK_ELSE - tryfinallystmt ::= SETUP_FINALLY stmts + tryfinallystmt ::= SETUP_FINALLY suite_stmts POP_BLOCK LOAD_CONST - COME_FROM stmts_opt END_FINALLY + COME_FROM suite_stmts_opt END_FINALLY - withstmt ::= expr SETUP_WITH POP_TOP stmts_opt + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY - withasstmt ::= expr SETUP_WITH designator stmts_opt + withasstmt ::= expr SETUP_WITH designator suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY @@ -526,12 +519,12 @@ def p_grammar(self, args): whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt _jump_back POP_BLOCK - stmts COME_FROM + else_suite COME_FROM whileelselaststmt ::= SETUP_LOOP testexpr l_stmts_opt _jump_back POP_BLOCK - c_stmts COME_FROM + else_suitec COME_FROM _for ::= GET_ITER FOR_ITER _for ::= LOAD_CONST FOR_LOOP @@ -543,13 +536,13 @@ def p_grammar(self, args): for_block POP_BLOCK COME_FROM forelsestmt ::= SETUP_LOOP expr _for designator - for_block POP_BLOCK stmts COME_FROM + for_block POP_BLOCK else_suite COME_FROM forelselaststmt ::= SETUP_LOOP expr _for designator - for_block POP_BLOCK c_stmts COME_FROM + for_block POP_BLOCK else_suitec COME_FROM forelselaststmtl ::= SETUP_LOOP expr _for designator - for_block POP_BLOCK l_stmts COME_FROM + for_block POP_BLOCK else_suitel COME_FROM return_stmts ::= return_stmt return_stmts ::= _stmts return_stmt diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 9ac4d66..d5276ba 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -664,12 +664,14 @@ def detect_structure(self, pos, op=None): and pre[rtarget] not in self.stmts \ and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget: if code[pre[pre[rtarget]]] == JA \ + and self.remove_mid_line_ifs([pos]) \ and target == self.get_target(pre[pre[rtarget]]) \ and pre[pre[rtarget]] not in self.stmts \ and 1 == len(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ (PJIF, PJIT), target))): pass elif code[pre[pre[rtarget]]] == RETURN_VALUE \ + and self.remove_mid_line_ifs([pos]) \ and 1 == (len(set(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ (PJIF, PJIT), target))) \ | set(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 3cf1a88..185631b 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -259,6 +259,7 @@ 'elifstmt': ( '%|elif %c:\n%+%c%-', 0, 1 ), 'elifelsestmt': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelsestmtr': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 2 ), + 'elifelsestmtr': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 2 ), 'whilestmt': ( '%|while %c:\n%+%c%-\n', 1, 2 ), 'while1stmt': ( '%|while 1:\n%+%c%-\n', 1 ), @@ -271,10 +272,12 @@ '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), 'forelselaststmtl': ( '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), - 'trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ), - 'tryelsestmt': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 5, -2 ), - 'tf_trystmt': ( '%c%-%c%+', 1, 5 ), - 'tf_tryelsestmt': ( '%c%-%c%|else:\n%+%c', 1, 5, -2 ), + 'trystmt': ( '%|try:\n%+%c%-%c', 1, 3 ), + 'tryelsestmt': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), + 'tryelsestmtc': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), + 'tryelsestmtl': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), + 'tf_trystmt': ( '%c%-%c%+', 1, 3 ), + 'tf_tryelsestmt': ( '%c%-%c%|else:\n%+%c', 1, 3, 4 ), 'except': ( '%|except:\n%+%c%-', 3 ), 'except_cond1': ( '%|except %c:\n', 1 ), 'except_cond2': ( '%|except %c as %c:\n', 1, 5 ), @@ -300,8 +303,8 @@ 'import_cont' : ( ', %c', 2), # CE - Fixes for tuples - '_25_assign2': ( '%|(%c, %c,) = (%c, %c)\n', 3, 4, 0, 1 ), - '_25_assign3': ( '%|(%c, %c, %c,) = (%c, %c, %c)\n', 5, 6, 7, 0, 1, 2 ), + '_25_assign2': ( '%|%c, %c = %c, %c\n', 3, 4, 0, 1 ), + '_25_assign3': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 6, 7, 0, 1, 2 ), } @@ -662,7 +665,7 @@ def n_delete_subscr(self, node): # 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-', 1, 5 ), def n_tryfinallystmt(self, node): - if len(node[1]) == 1 and node[1][0] == 'sstmt' and node[1][0][0] == 'stmt': + if len(node[1][0]) == 1 and node[1][0][0] == 'stmt': if node[1][0][0][0] == 'trystmt': node[1][0][0][0].type = 'tf_trystmt' if node[1][0][0][0] == 'tryelsestmt': @@ -685,56 +688,81 @@ def n_exec_stmt(self, node): self.prune() # stop recursing def n_ifelsestmt(self, node, preprocess=0): - if len(node[3]) == 1 and not node[3][0] == 'continue_stmt': - ifnode = node[3][0][0][0] - if node[3][0] == 'lastc_stmt' and node[3][0][0].type == 'iflaststmt': - node.type = 'ifelifstmt' - node[3][0][0].type = 'elifstmt' - elif ifnode.type in ('ifelsestmt', 'ifelsestmtc'): - node.type = 'ifelifstmt' - self.n_ifelsestmt(ifnode, preprocess=1) - if ifnode == 'ifelifstmt': - ifnode.type = 'elifelifstmt' - elif ifnode.type in ('ifelsestmt', 'ifelsestmtc'): - ifnode.type = 'elifelsestmt' - elif ifnode == 'ifstmt': - node.type = 'ifelifstmt' - ifnode.type = 'elifstmt' + n = node[3][0] + if len(n) == 1 == len(n[0]) and n[0] == '_stmts': + n = n[0][0][0] + elif n[0].type in ('lastc_stmt', 'lastl_stmt'): + n = n[0][0] + else: + if not preprocess: + self.default(node) + return + + if n.type in ('ifstmt', 'iflaststmt', 'iflaststmtl'): + node.type = 'ifelifstmt' + n.type = 'elifstmt' + elif n.type in ('ifelsestmtr',): + node.type = 'ifelifstmt' + n.type = 'elifelsestmtr' + elif n.type in ('ifelsestmt', 'ifelsestmtc', 'ifelsestmtl'): + node.type = 'ifelifstmt' + self.n_ifelsestmt(n, preprocess=1) + if n == 'ifelifstmt': + n.type = 'elifelifstmt' + elif n.type in ('ifelsestmt', 'ifelsestmtc', 'ifelsestmtl'): + n.type = 'elifelsestmt' if not preprocess: self.default(node) - def n_ifelsestmtc(self, node, preprocess=0): - if len(node[3]) == 1 and node[3][0] == 'lastc_stmt': - ifnode = node[3][0][0] - if ifnode == 'ifelsestmtc': - node.type = 'ifelifstmt' - self.n_ifelsestmtc(ifnode, preprocess=1) - if ifnode == 'ifelifstmt': - ifnode.type = 'elifelifstmt' - elif ifnode == 'ifelsestmtc': - ifnode.type = 'elifelsestmt' - elif ifnode == 'iflaststmt': - node.type = 'ifelifstmt' - ifnode.type = 'elifstmt' - if not preprocess: + n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt + + def n_ifelsestmtr(self, node): + if len(node[2]) != 2: self.default(node) - - def n_ifelsestmtl(self, node, preprocess=0): - if len(node[3]) == 1 and node[3][0] == 'lastl_stmt': - ifnode = node[3][0][0] - if ifnode == 'ifelsestmtl': - node.type = 'ifelifstmt' - self.n_ifelsestmtl(ifnode, preprocess=1) - if ifnode == 'ifelifstmt': - ifnode.type = 'elifelifstmt' - elif ifnode == 'ifelsestmtl': - ifnode.type = 'elifelsestmt' - elif ifnode == 'iflaststmtl': - node.type = 'ifelifstmt' - ifnode.type = 'elifstmt' - if not preprocess: + + if not (node[2][0][0][0] == 'ifstmt' and node[2][0][0][0][1][0] == 'return_stmts') \ + and not (node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_stmts'): self.default(node) + return + + if node.type == 'elifelsestmtr': + self.write(self.indent, 'elif ') + else: + self.write(self.indent, 'if ') + self.preorder(node[0]) + self.print_(':') + self.indentMore() + self.preorder(node[1]) + self.indentLess() + if_ret_at_end = False + if len(node[2][0]) >= 3: + if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_stmts': + if_ret_at_end = True + + past_else = False + prev_stmt_is_if_ret = True + for n in node[2][0]: + if (n[0] == 'ifstmt' and n[0][1][0] == 'return_stmts'): + if prev_stmt_is_if_ret: + n[0].type = 'elifstmt' + prev_stmt_is_if_ret = True + else: + prev_stmt_is_if_ret = False + if not past_else and not if_ret_at_end: + self.print_(self.indent, 'else:') + self.indentMore() + past_else = True + self.preorder(n) + if not past_else or if_ret_at_end: + self.print_(self.indent, 'else:') + self.indentMore() + self.preorder(node[2][1]) + self.indentLess() + self.prune() + + n_elifelsestmtr = n_ifelsestmtr + def n_import_as(self, node): iname = node[0].pattr; assert node[-1][-1].type.startswith('STORE_') @@ -933,6 +961,19 @@ def n_build_list(self, node): self.prec = p self.prune() + def n_unpack(self, node): + for elem in node[1:]: + if elem[0].type == 'unpack': + self.write('(') + self.preorder(elem) + self.write(')') + else: + self.preorder(elem) + if elem is not node[-1]: + self.write(', ') + if len(node) == 2: + self.write(',') + self.prune() def engine(self, entry, startnode): #self.print_("-----") From 071093c75b8161c47bf551975c74450bc515f2bc Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 29 Feb 2012 23:20:22 -0600 Subject: [PATCH 07/36] various fixes; reduced number of newlines between functions --- uncompyle2/Parser.py | 54 +++--- uncompyle2/Scanner.py | 54 +++--- uncompyle2/Walker.py | 256 ++++++++++++++++++--------- uncompyle2/__init__.py | 2 + uncompyle2/verify.py | 391 +++++++++++++++++++++-------------------- 5 files changed, 416 insertions(+), 341 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index 974f796..ef7f84c 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -188,10 +188,10 @@ def p_assign(self, args): assign ::= expr DUP_TOP designList assign ::= expr designator - stmt ::= _25_assign2 - stmt ::= _25_assign3 - _25_assign2 ::= expr expr ROT_TWO designator designator - _25_assign3 ::= expr expr expr ROT_THREE ROT_TWO designator designator designator + stmt ::= assign2 + stmt ::= assign3 + assign2 ::= expr expr ROT_TWO designator designator + assign3 ::= expr expr expr ROT_THREE ROT_TWO designator designator designator ''' def p_print(self, args): @@ -225,15 +225,11 @@ def p_print_to(self, args): def p_import20(self, args): ''' - stmt ::= _25_importstmt - stmt ::= _25_importfrom - stmt ::= _25_importstar + stmt ::= importstmt + stmt ::= importfrom + stmt ::= importstar stmt ::= importmultiple - importstmt2 ::= LOAD_CONST import_as - importstar2 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR - - importfrom2 ::= LOAD_CONST IMPORT_NAME importlist2 POP_TOP importlist2 ::= importlist2 import_as importlist2 ::= import_as import_as ::= IMPORT_NAME designator @@ -242,11 +238,11 @@ def p_import20(self, args): import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR LOAD_ATTR designator import_as ::= IMPORT_FROM designator - _25_importstmt ::= LOAD_CONST LOAD_CONST import_as - _25_importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR - _25_importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP - _25_importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR - _25_importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP + importstmt ::= LOAD_CONST LOAD_CONST import_as + importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR + importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP + importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR + importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP importmultiple ::= LOAD_CONST LOAD_CONST import_as imports_cont imports_cont ::= imports_cont import_cont @@ -292,6 +288,7 @@ def p_grammar(self, args): l_stmts ::= _stmts l_stmts ::= return_stmts + l_stmts ::= continue_stmts l_stmts ::= _stmts lastl_stmt l_stmts ::= lastl_stmt @@ -341,7 +338,7 @@ def p_grammar(self, args): stmt ::= break_stmt break_stmt ::= BREAK_LOOP - continue_stmt ::= JUMP_BACK + continue_stmt ::= CONTINUE continue_stmt ::= CONTINUE_LOOP continue_stmts ::= _stmts lastl_stmt continue_stmt continue_stmts ::= lastl_stmt continue_stmt @@ -412,7 +409,6 @@ def p_grammar(self, args): _jump ::= JUMP_ABSOLUTE _jump ::= JUMP_FORWARD _jump ::= JUMP_BACK - _jump ::= JUMP_BACK JUMP_BACK_ELSE jmp_false ::= POP_JUMP_IF_FALSE jmp_true ::= POP_JUMP_IF_TRUE @@ -430,9 +426,7 @@ def p_grammar(self, args): iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE - iflaststmtl ::= testexpr l_stmts_opt JUMP_ABSOLUTE - iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK - iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK JUMP_BACK_ELSE + iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM @@ -440,9 +434,7 @@ def p_grammar(self, args): ifelsestmtr ::= testexpr return_stmts return_stmts - ifelsestmtl ::= testexpr l_stmts_opt JUMP_ABSOLUTE else_suitel - ifelsestmtl ::= testexpr l_stmts_opt _jump_back_jump_back_else else_suitel - _jump_back_jump_back_else ::= JUMP_BACK JUMP_BACK_ELSE + ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK @@ -485,7 +477,6 @@ def p_grammar(self, args): jmp_abs ::= JUMP_ABSOLUTE jmp_abs ::= JUMP_BACK - jmp_abs ::= JUMP_BACK JUMP_BACK_ELSE @@ -503,33 +494,30 @@ def p_grammar(self, args): whilestmt ::= SETUP_LOOP testexpr - l_stmts_opt _jump_back + l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM - _jump_back ::= JUMP_BACK - _jump_back ::= JUMP_BACK JUMP_BACK_ELSE - whilestmt ::= SETUP_LOOP testexpr return_stmts POP_BLOCK COME_FROM - while1stmt ::= SETUP_LOOP l_stmts _jump_back COME_FROM + while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK COME_FROM while1stmt ::= SETUP_LOOP return_stmts COME_FROM whileelsestmt ::= SETUP_LOOP testexpr - l_stmts_opt _jump_back + l_stmts_opt JUMP_BACK POP_BLOCK else_suite COME_FROM whileelselaststmt ::= SETUP_LOOP testexpr - l_stmts_opt _jump_back + l_stmts_opt JUMP_BACK POP_BLOCK else_suitec COME_FROM _for ::= GET_ITER FOR_ITER _for ::= LOAD_CONST FOR_LOOP - for_block ::= l_stmts_opt _jump_back + for_block ::= l_stmts_opt JUMP_BACK for_block ::= return_stmts _come_from forstmt ::= SETUP_LOOP expr _for designator diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index d5276ba..4c39341 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -45,7 +45,7 @@ def __cmp__(self, o): def __repr__(self): return str(self.type) def __str__(self): - pattr = self.pattr or '' + pattr = self.pattr if self.linestart: return '\n%s\t%-17s %r' % (self.offset, self.type, pattr) else: @@ -239,7 +239,11 @@ def unmangle(name): elif op == JA: target = self.get_target(offset) if target < offset: - opname = 'JUMP_BACK' + if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \ + and offset not in self.not_continue: + opname = 'CONTINUE' + else: + opname = 'JUMP_BACK' elif op == LOAD_GLOBAL: try: @@ -248,18 +252,10 @@ def unmangle(name): except AttributeError: pass - elif op == IMPORT_NAME: - if pattr == '': - pattr = '.' - if offset not in replace: rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) else: rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) - - if offset in self.jump_back_else: - rv.append(Token('JUMP_BACK_ELSE', None, None, - offset="%s_" % offset )) if self.showasm: out = self.out # shortcut @@ -401,11 +397,11 @@ def build_stmt_indices(self): POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, - IMPORT_NAME, IMPORT_FROM, RETURN_VALUE, RAISE_VARARGS, POP_TOP, + RETURN_VALUE, RAISE_VARARGS, POP_TOP, PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO, STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3, - JUMP_ABSOLUTE, + JUMP_ABSOLUTE, EXEC_STMT, } stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)] @@ -413,7 +409,7 @@ def build_stmt_indices(self): designator_ops = { STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, - STORE_SUBSCR, UNPACK_SEQUENCE, + STORE_SUBSCR, UNPACK_SEQUENCE, JA } prelim = self.all_instr(start, end, stmt_opcodes) @@ -451,7 +447,7 @@ def build_stmt_indices(self): stmts.remove(s) continue j = self.prev[s] - while j == JA: + while code[j] == JA: j = self.prev[j] if code[j] == LIST_APPEND: #list comprehension stmts.remove(s) @@ -634,26 +630,25 @@ def detect_structure(self, pos, op=None): elif op in (PJIF, PJIT): - + #import pdb; pdb.set_trace() start = pos+3 target = self.get_target(pos, op) rtarget = self.restrict_to_parent(target, parent) pre = self.prev + if target != rtarget and parent['type'] == 'and/or': + self.fixed_jumps[pos] = rtarget + return #does this jump to right after another cond jump? # if so, it's part of a larger conditional if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, PJIF, PJIT)) and (target > pos): self.fixed_jumps[pos] = pre[target] + self.structs.append({'type': 'and/or', + 'start': start, + 'end': pre[target]}) return - - # is this an if-else at end of a loop? - # if so, indicate with special opcode to help parser - if target == rtarget: - if code[pre[target]] == JA and code[target] != POP_BLOCK: - if self.get_target(pre[target]) < pos: - self.jump_back_else.add(pre[target]) - + # is this an if and if op == PJIF: #import pdb; pdb.set_trace() @@ -666,7 +661,7 @@ def detect_structure(self, pos, op=None): if code[pre[pre[rtarget]]] == JA \ and self.remove_mid_line_ifs([pos]) \ and target == self.get_target(pre[pre[rtarget]]) \ - and pre[pre[rtarget]] not in self.stmts \ + and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\ and 1 == len(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ (PJIF, PJIT), target))): pass @@ -710,7 +705,9 @@ def detect_structure(self, pos, op=None): if pos in self.ignore_if: return - if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts and pre[rtarget] != pos: + if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \ + and pre[rtarget] != pos and pre[pre[rtarget]] != pos \ + and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA): rtarget = pre[rtarget] #does the if jump just beyond a jump op, then this is probably an if statement @@ -727,7 +724,8 @@ def detect_structure(self, pos, op=None): self.structs.append({'type': 'if-then', 'start': start, 'end': pre[rtarget]}) - + self.not_continue.add(pre[rtarget]) + if rtarget < end: self.structs.append({'type': 'if-else', 'start': rtarget, @@ -743,6 +741,8 @@ def detect_structure(self, pos, op=None): unop_target = self.last_instr(pos, target, JF, target) if unop_target and code[unop_target+3] != ROT_TWO: self.fixed_jumps[pos] = unop_target + else: + self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) @@ -766,9 +766,9 @@ def find_jump_targets(self, code): 'end': n-1}] self.loops = [] ## All loop entry points self.fixed_jumps = {} ## Map fixed jumps to their real destination - self.jump_back_else = set() self.ignore_if = set() self.build_stmt_indices() + self.not_continue = set() targets = {} for i in self.op_range(0, n): diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 185631b..10234ac 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -94,9 +94,9 @@ 'STORE_ATTR': ( '%c.%[1]{pattr}', 0), # 'STORE_SUBSCR': ( '%c[%c]', 0, 1 ), 'STORE_SLICE+0': ( '%c[:]', 0 ), - 'STORE_SLICE+1': ( '%c[%c:]', 0, 1 ), - 'STORE_SLICE+2': ( '%c[:%c]', 0, 1 ), - 'STORE_SLICE+3': ( '%c[%c:%c]', 0, 1, 2 ), + 'STORE_SLICE+1': ( '%c[%p:]', 0, (1,100) ), + 'STORE_SLICE+2': ( '%c[:%p]', 0, (1,100) ), + 'STORE_SLICE+3': ( '%c[%p:%p]', 0, (1,100), (2,100) ), 'DELETE_SLICE+0': ( '%|del %c[:]\n', 0 ), 'DELETE_SLICE+1': ( '%|del %c[%c:]\n', 0, 1 ), 'DELETE_SLICE+2': ( '%|del %c[:%c]\n', 0, 1 ), @@ -147,9 +147,9 @@ 'unary_convert': ( '`%c`', 0 ), 'get_iter': ( 'iter(%c)', 0 ), 'slice0': ( '%c[:]', 0 ), - 'slice1': ( '%c[%c:]', 0, 1 ), - 'slice2': ( '%c[:%c]', 0, 1 ), - 'slice3': ( '%c[%c:%c]', 0, 1, 2 ), + 'slice1': ( '%c[%p:]', 0, (1,100) ), + 'slice2': ( '%c[:%p]', 0, (1,100) ), + 'slice3': ( '%c[%p:%p]', 0, (1,100), (2,100) ), 'IMPORT_FROM': ( '%{pattr}', ), 'load_attr': ( '%c.%[1]{pattr}', 0), @@ -164,14 +164,15 @@ 'DELETE_NAME': ( '%|del %{pattr}\n', ), 'DELETE_GLOBAL': ( '%|del %{pattr}\n', ), 'delete_subscr': ( '%|del %c[%c]\n', 0, 1,), - 'binary_subscr': ( '%c[%c]', 0, 1), - 'binary_subscr2': ( '%c[%c]', 0, 1), + 'binary_subscr': ( '%c[%p]', 0, (1,100)), + 'binary_subscr2': ( '%c[%p]', 0, (1,100)), 'store_subscr': ( '%c[%c]', 0, 1), 'STORE_FAST': ( '%{pattr}', ), 'STORE_NAME': ( '%{pattr}', ), 'STORE_GLOBAL': ( '%{pattr}', ), 'STORE_DEREF': ( '%{pattr}', ), - 'unpack': ( '(%C,)', (1, sys.maxint, ', ') ), + 'unpack': ( '%C%,', (1, sys.maxint, ', ') ), + 'unpack_w_parens': ( '(%C%,)', (1, sys.maxint, ', ') ), 'unpack_list': ( '[%C]', (1, sys.maxint, ', ') ), 'build_tuple2': ( '%P', (0,-1,', ', 100) ), @@ -199,30 +200,24 @@ 'and': ( '%c and %c', 0, 2 ), 'and2': ( '%c', 3 ), 'or': ( '%c or %c', 0, 2 ), - 'conditional': ( '%p if %p else %p', (2,100), (0,100), (4,100)), - 'conditionaland': ( '%p if %p and %p else %p', (4,100), (0,24), (2,24), (6,100)), - 'conditionalnot': ( '%p if not %p else %p', (2,100), (0,22), (4,100)), + 'conditional': ( '%p if %p else %p', (2,27), (0,27), (4,27)), + 'conditionaland': ( '%p if %p and %p else %p', (4,27), (0,24), (2,24), (6,27)), + 'conditionalnot': ( '%p if not %p else %p', (2,27), (0,22), (4,27)), 'conditional_lambda': ( '(%c if %c else %c)', 2, 0, 3), 'conditional_lambda2': ( '(%c if %p and %p else %c)', 4, (0,24), (2,24), 5), 'return_lambda': ('%c', 0), - 'compare': ( '%c %[-1]{pattr} %c', 0, 1 ), - 'cmp_list': ( '%c %c', 0, 1), - 'cmp_list1': ( '%[3]{pattr} %c %c', 0, -2), - 'cmp_list2': ( '%[1]{pattr} %c', 0), + 'compare': ( '%p %[-1]{pattr} %p', (0,19), (1,19) ), + 'cmp_list': ( '%p %p', (0,20), (1,19)), + 'cmp_list1': ( '%[3]{pattr} %p %p', (0,19), (-2,19)), + 'cmp_list2': ( '%[1]{pattr} %p', (0,19)), # 'classdef': (), # handled by n_classdef() - 'funcdef': ( '\n%|def %c\n', -2), # -2 to handle closures - 'funcdefdeco': ( '%c', 0), - 'mkfuncdeco': ( '\n%|@%c%c', 0, 1), - 'mkfuncdeco0': ( '\n%|def %c\n', 0), + 'funcdef': ( '\n\n%|def %c\n', -2), # -2 to handle closures + 'funcdefdeco': ( '\n\n%c', 0), + 'mkfuncdeco': ( '%|@%c\n%c', 0, 1), + 'mkfuncdeco0': ( '%|def %c\n', 0), 'classdefdeco': ( '%c', 0), - 'classdefdeco1': ( '\n%|@%c%c', 0, 1), + 'classdefdeco1': ( '\n\n%|@%c%c', 0, 1), 'kwarg': ( '%[0]{pattr}=%c', 1), - 'importstmt': ( '%|import %[0]{pattr}\n', ), - 'importfrom': ( '%|from %[0]{pattr} import %c\n', 1 ), - 'importlist': ( '%C', (0, sys.maxint, ', ') ), - 'importstmt2': ( '%|import %c\n', 1), - 'importstar2': ( '%|from %[1]{pattr} import *\n', ), - 'importfrom2': ( '%|from %[1]{pattr} import %c\n', 2 ), 'importlist2': ( '%C', (0, sys.maxint, ', ') ), 'assert': ( '%|assert %c\n' , 0 ), @@ -259,21 +254,21 @@ 'elifstmt': ( '%|elif %c:\n%+%c%-', 0, 1 ), 'elifelsestmt': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelsestmtr': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 2 ), - 'elifelsestmtr': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 2 ), + 'elifelsestmtr': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-\n\n', 0, 1, 2 ), - 'whilestmt': ( '%|while %c:\n%+%c%-\n', 1, 2 ), - 'while1stmt': ( '%|while 1:\n%+%c%-\n', 1 ), - 'whileelsestmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-\n', 1, 2, -2 ), - 'whileelselaststmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-\n', 1, 2, -2 ), - 'forstmt': ( '%|for %c in %c:\n%+%c%-\n', 3, 1, 4 ), + 'whilestmt': ( '%|while %c:\n%+%c%-\n\n', 1, 2 ), + 'while1stmt': ( '%|while 1:\n%+%c%-\n\n', 1 ), + 'whileelsestmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-\n\n', 1, 2, -2 ), + 'whileelselaststmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-', 1, 2, -2 ), + 'forstmt': ( '%|for %c in %c:\n%+%c%-\n\n', 3, 1, 4 ), 'forelsestmt': ( - '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n', 3, 1, 4, -2), + '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, 1, 4, -2), 'forelselaststmt': ( '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), 'forelselaststmtl': ( - '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), - 'trystmt': ( '%|try:\n%+%c%-%c', 1, 3 ), - 'tryelsestmt': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), + '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, 1, 4, -2), + 'trystmt': ( '%|try:\n%+%c%-%c\n\n', 1, 3 ), + 'tryelsestmt': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-\n\n', 1, 3, 4 ), 'tryelsestmtc': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), 'tryelsestmtl': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), 'tf_trystmt': ( '%c%-%c%+', 1, 3 ), @@ -282,7 +277,7 @@ 'except_cond1': ( '%|except %c:\n', 1 ), 'except_cond2': ( '%|except %c as %c:\n', 1, 5 ), 'except_suite': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-', 1, 5 ), + 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 5 ), 'withstmt': ( '%|with %c:\n%+%c%-', 0, 3), 'withasstmt': ( '%|with %c as %c:\n%+%c%-', 0, 2, 3), 'passstmt': ( '%|pass\n', ), @@ -296,15 +291,15 @@ ## # Import style for 2.5 - '_25_importstmt': ( '%|import %c\n', 2), - '_25_importstar': ( '%|from %[2]{pattr} import *\n', ), - '_25_importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), + 'importstmt': ( '%|import %c\n', 2), + 'importstar': ( '%|from %[2]{pattr} import *\n', ), + 'importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), 'importmultiple': ( '%|import %c%c\n', 2, 3), 'import_cont' : ( ', %c', 2), # CE - Fixes for tuples - '_25_assign2': ( '%|%c, %c = %c, %c\n', 3, 4, 0, 1 ), - '_25_assign3': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 6, 7, 0, 1, 2 ), + 'assign2': ( '%|%c, %c = %c, %c\n', 3, 4, 0, 1 ), + 'assign3': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 6, 7, 0, 1, 2 ), } @@ -447,6 +442,7 @@ def __init__(self, out, scanner, showast=0): self.return_none = False self.mod_globs = set() self.currentclass = None + self.pending_newlines = 0 f = property(lambda s: s.__params['f'], lambda s, x: s.__params.__setitem__('f', x), @@ -476,6 +472,8 @@ def indentLess(self, indent=TAB): def traverse(self, node, indent=None, isLambda=0): self.__param_stack.append(self.__params) if indent is None: indent = self.indent + p = self.pending_newlines + self.pending_newlines = 0 self.__params = { '_globals': {}, 'f': cStringIO.StringIO(), @@ -483,21 +481,49 @@ def traverse(self, node, indent=None, isLambda=0): 'isLambda': isLambda, } self.preorder(node) + self.f.write('\n'*self.pending_newlines) result = self.f.getvalue() self.__params = self.__param_stack.pop() + self.pending_newlines = p return result def write(self, *data): - if type(data) == ListType: - self.f.writelines(data) - elif type(data) == TupleType: - self.f.writelines(list(data)) - else: - self.f.write(data) + if (len(data) == 0) or (len(data) == 1 and data[0] == ''): + return +# import pdb; pdb.set_trace() + out = ''.join((str(j) for j in data)) + n = 0 + for i in out: + if i == '\n': + n += 1 + if n == len(out): + self.pending_newlines = max(self.pending_newlines, n) + return + elif n: + self.pending_newlines = max(self.pending_newlines, n) + out = out[n:] + break + else: + break + + if self.pending_newlines > 0: + self.f.write('\n'*self.pending_newlines) + self.pending_newlines = 0 + + for i in out[::-1]: + if i == '\n': + self.pending_newlines += 1 + else: + break + + if self.pending_newlines: + out = out[:-self.pending_newlines] + self.f.write(out) def print_(self, *data): - self.write(*data) - print >> self.f + if data and not(len(data) == 1 and data[0] ==''): + self.write(*data) + self.pending_newlines = max(self.pending_newlines, 1) def print_docstring(self, indent, docstring): quote = '"""' @@ -579,6 +605,8 @@ def n_yield(self, node): self.prune() # stop recursing def n_buildslice3(self, node): + p = self.prec + self.prec = 100 if node[0] != NONE: self.preorder(node[0]) self.write(':') @@ -587,14 +615,18 @@ def n_buildslice3(self, node): self.write(':') if node[2] != NONE: self.preorder(node[2]) + self.prec = p self.prune() # stop recursing def n_buildslice2(self, node): + p = self.prec + self.prec = 100 if node[0] != NONE: self.preorder(node[0]) self.write(':') if node[1] != NONE: self.preorder(node[1]) + self.prec = p self.prune() # stop recursing # def n_l_stmts(self, node): @@ -611,6 +643,8 @@ def n_expr(self, node): else: n = node[0] self.prec = PRECEDENCE.get(n,-2) + if n == 'LOAD_CONST' and repr(n.pattr)[0] == '-': + self.prec = 6 if p < self.prec: self.write('(') self.preorder(node[0]) @@ -725,10 +759,7 @@ def n_ifelsestmtr(self, node): self.default(node) return - if node.type == 'elifelsestmtr': - self.write(self.indent, 'elif ') - else: - self.write(self.indent, 'if ') + self.write(self.indent, 'if ') self.preorder(node[0]) self.print_(':') self.indentMore() @@ -760,9 +791,39 @@ def n_ifelsestmtr(self, node): self.preorder(node[2][1]) self.indentLess() self.prune() - - n_elifelsestmtr = n_ifelsestmtr - + + def n_elifelsestmtr(self, node): + if len(node[2]) != 2: + self.default(node) + + for n in node[2][0]: + if not (n[0] == 'ifstmt' and n[0][1][0] == 'return_stmts'): + self.default(node) + return + + self.write(self.indent, 'elif ') + self.preorder(node[0]) + self.print_(':') + self.indentMore() + self.preorder(node[1]) + self.indentLess() + + if_ret_at_end = False + if len(node[2][0]) >= 3: + if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_stmts': + if_ret_at_end = True + + past_else = False + prev_stmt_is_if_ret = True + for n in node[2][0]: + n[0].type = 'elifstmt' + self.preorder(n) + self.print_(self.indent, 'else:') + self.indentMore() + self.preorder(node[2][1]) + self.indentLess() + self.prune() + def n_import_as(self, node): iname = node[0].pattr; assert node[-1][-1].type.startswith('STORE_') @@ -775,10 +836,21 @@ def n_import_as(self, node): n_import_as_cont = n_import_as + def n_importfrom(self, node): + if node[0].pattr > 0: + node[2].pattr = '.'*node[0].pattr+node[2].pattr + self.default(node) + + n_importstar = n_importfrom + def n_mkfunc(self, node): self.write(node[-2].attr.co_name) # = code.co_name self.indentMore() self.make_function(node, isLambda=0) + if len(self.__param_stack) > 1: + self.write('\n\n') + else: + self.write('\n\n\n') self.indentLess() self.prune() # stop recursing @@ -788,7 +860,7 @@ def n_mklambda(self, node): def n_list_compr(self, node): p = self.prec - self.prec = 100 + self.prec = 27 n = node[-1] assert n == 'list_iter' # find innerst node @@ -802,12 +874,12 @@ def n_list_compr(self, node): self.preorder(n[0]) # lc_body self.preorder(node[-1]) # for/if parts self.write( ' ]') - self.prune() # stop recursing self.prec = p + self.prune() # stop recursing def comprehension_walk(self, node, iter_index): p = self.prec - self.prec = 100 + self.prec = 27 code = node[-5].attr assert type(code) == CodeType @@ -856,7 +928,7 @@ def n_classdef(self, node): cclass = self.currentclass self.currentclass = str(node[0].pattr) - self.print_() + self.write('\n\n') self.write(self.indent, 'class ', self.currentclass) self.print_super_classes(node) self.print_(':') @@ -865,9 +937,15 @@ def n_classdef(self, node): self.indentMore() self.build_class(node[2][-2].attr) self.indentLess() - self.prune() self.currentclass = cclass + if len(self.__param_stack) > 1: + self.write('\n\n') + else: + self.write('\n\n\n') + + self.prune() + n_classdefdeco2 = n_classdef @@ -962,18 +1040,24 @@ def n_build_list(self, node): self.prune() def n_unpack(self, node): - for elem in node[1:]: - if elem[0].type == 'unpack': - self.write('(') - self.preorder(elem) - self.write(')') - else: - self.preorder(elem) - if elem is not node[-1]: - self.write(', ') - if len(node) == 2: - self.write(',') - self.prune() + for n in node[1:]: + if n[0].type == 'unpack': + n[0].type = 'unpack_w_parens' + self.default(node) + + n_unpack_w_parens = n_unpack + + def n_assign2(self, node): + for n in node[-2:]: + if n[0] == 'unpack': + n[0].type = 'unpack_w_parens' + self.default(node) + + def n_assign3(self, node): + for n in node[-3:]: + if n[0] == 'unpack': + n[0].type = 'unpack_w_parens' + self.default(node) def engine(self, entry, startnode): #self.print_("-----") @@ -1004,9 +1088,9 @@ def engine(self, entry, startnode): elif typ == '-': self.indentLess() elif typ == '|': self.write(self.indent) ## no longer used, since BUILD_TUPLE_n is pretty printed: - ##elif typ == ',': - ## if lastC == 1: - ## self.write(',') + elif typ == ',': + if lastC == 1: + self.write(',') elif typ == 'c': self.preorder(node[entry[arg]]) arg += 1 @@ -1018,8 +1102,8 @@ def engine(self, entry, startnode): arg += 1 elif typ == 'C': low, high, sep = entry[arg] - ## lastC = remaining = len(node[low:high]) - remaining = len(node[low:high]) + lastC = remaining = len(node[low:high]) + ## remaining = len(node[low:high]) for subnode in node[low:high]: self.preorder(subnode) remaining -= 1 @@ -1029,8 +1113,8 @@ def engine(self, entry, startnode): elif typ == 'P': p = self.prec low, high, sep, self.prec = entry[arg] - ## lastC = remaining = len(node[low:high]) - remaining = len(node[low:high]) + lastC = remaining = len(node[low:high]) + ## remaining = len(node[low:high]) for subnode in node[low:high]: self.preorder(subnode) remaining -= 1 @@ -1042,7 +1126,7 @@ def engine(self, entry, startnode): d = node.__dict__ expr = m.group('expr') try: - self.f.write(eval(expr, d, d)) + self.write(eval(expr, d, d)) except: print node raise @@ -1124,9 +1208,9 @@ def get_tuple_parameter(self, ast, name): assert node[1] == 'designator' # if lhs is not a UNPACK_TUPLE (or equiv.), # add parenteses to make this a tuple - if node[1][0] not in ('unpack', 'unpack_list'): - return '(' + self.traverse(node[1]) + ')' - return self.traverse(node[1]) + #if node[1][0] not in ('unpack', 'unpack_list'): + return '(' + self.traverse(node[1]) + ')' + #return self.traverse(node[1]) raise "Can't find tuple parameter" % name diff --git a/uncompyle2/__init__.py b/uncompyle2/__init__.py index 3bf650d..e223ed0 100644 --- a/uncompyle2/__init__.py +++ b/uncompyle2/__init__.py @@ -115,6 +115,8 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): pass walker.mod_globs = Walker.find_globals(ast, set()) walker.gen_source(ast, customize) + for g in walker.mod_globs: + walker.write('global %s ## Warning: Unused global\n' % g) if walker.ERROR: raise walker.ERROR diff --git a/uncompyle2/verify.py b/uncompyle2/verify.py index d386dcb..da88428 100644 --- a/uncompyle2/verify.py +++ b/uncompyle2/verify.py @@ -12,223 +12,224 @@ #--- exceptions --- class VerifyCmpError(Exception): - pass + pass class CmpErrorConsts(VerifyCmpError): - """Exception to be raised when consts differ.""" - def __init__(self, name, index): - self.name = name - self.index = index - - def __str__(self): - return 'Compare Error within Consts of %s at index %i' % \ - (repr(self.name), self.index) - + """Exception to be raised when consts differ.""" + def __init__(self, name, index): + self.name = name + self.index = index + + def __str__(self): + return 'Compare Error within Consts of %s at index %i' % \ + (repr(self.name), self.index) + class CmpErrorConstsType(VerifyCmpError): - """Exception to be raised when consts differ.""" - def __init__(self, name, index): - self.name = name - self.index = index + """Exception to be raised when consts differ.""" + def __init__(self, name, index): + self.name = name + self.index = index - def __str__(self): - return 'Consts type differ in %s at index %i' % \ - (repr(self.name), self.index) + def __str__(self): + return 'Consts type differ in %s at index %i' % \ + (repr(self.name), self.index) class CmpErrorConstsLen(VerifyCmpError): - """Exception to be raised when length of co_consts differs.""" - def __init__(self, name, consts1, consts2): - self.name = name - self.consts = (consts1, consts2) - - def __str__(self): - return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \ - (repr(self.name), - len(self.consts[0]), `self.consts[0]`, - len(self.consts[1]), `self.consts[1]`) - + """Exception to be raised when length of co_consts differs.""" + def __init__(self, name, consts1, consts2): + self.name = name + self.consts = (consts1, consts2) + + def __str__(self): + return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \ + (repr(self.name), + len(self.consts[0]), `self.consts[0]`, + len(self.consts[1]), `self.consts[1]`) + class CmpErrorCode(VerifyCmpError): - """Exception to be raised when code differs.""" - def __init__(self, name, index, token1, token2, tokens1, tokens2): - self.name = name - self.index = index - self.token1 = token1 - self.token2 = token2 - self.tokens = [tokens1, tokens2] - - def __str__(self): - s = reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), - map(lambda a,b: (a,b), - self.tokens[0], - self.tokens[1]), - 'Code differs in %s\n' % str(self.name)) - return ('Code differs in %s at offset %i [%s] != [%s]\n\n' % \ - (repr(self.name), self.index, - repr(self.token1), repr(self.token2))) + s + """Exception to be raised when code differs.""" + def __init__(self, name, index, token1, token2, tokens1, tokens2): + self.name = name + self.index = index + self.token1 = token1 + self.token2 = token2 + self.tokens = [tokens1, tokens2] + + def __str__(self): + s = reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), + map(lambda a,b: (a,b), + self.tokens[0], + self.tokens[1]), + 'Code differs in %s\n' % str(self.name)) + return ('Code differs in %s at offset %s [%s] != [%s]\n\n' % \ + (repr(self.name), self.index, + repr(self.token1), repr(self.token2))) + s class CmpErrorCodeLen(VerifyCmpError): - """Exception to be raised when code length differs.""" - def __init__(self, name, tokens1, tokens2): - self.name = name - self.tokens = [tokens1, tokens2] - - def __str__(self): - return reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), - map(lambda a,b: (a,b), - self.tokens[0], - self.tokens[1]), - 'Code len differs in %s\n' % str(self.name)) + """Exception to be raised when code length differs.""" + def __init__(self, name, tokens1, tokens2): + self.name = name + self.tokens = [tokens1, tokens2] + + def __str__(self): + return reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), + map(lambda a,b: (a,b), + self.tokens[0], + self.tokens[1]), + 'Code len differs in %s\n' % str(self.name)) class CmpErrorMember(VerifyCmpError): - """Exception to be raised when other members differ.""" - def __init__(self, name, member, data1, data2): - self.name = name - self.member = member - self.data = (data1, data2) + """Exception to be raised when other members differ.""" + def __init__(self, name, member, data1, data2): + self.name = name + self.member = member + self.data = (data1, data2) - def __str__(self): - return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \ - (repr(self.member), repr(self.name), - repr(self.data[0]), repr(self.data[1])) + def __str__(self): + return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \ + (repr(self.member), repr(self.name), + repr(self.data[0]), repr(self.data[1])) #--- compare --- - + # these members are ignored -__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_consts'] +__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_stacksize', 'co_names'] def cmp_code_objects(version, code_obj1, code_obj2, name=''): - """ - Compare two code-objects. - - This is the main part of this module. - """ - #print code_obj1, type(code_obj2) - assert type(code_obj1) == types.CodeType - assert type(code_obj2) == types.CodeType - #print dir(code_obj1) - if isinstance(code_obj1, object): - # new style classes (Python 2.2) - # assume _both_ code objects to be new stle classes - assert dir(code_obj1) == dir(code_obj2) - else: - # old style classes - assert dir(code_obj1) == code_obj1.__members__ - assert dir(code_obj2) == code_obj2.__members__ - assert code_obj1.__members__ == code_obj2.__members__ - - if name == '__main__': - name = code_obj1.co_name - else: - name = '%s.%s' % (name, code_obj1.co_name) - if name == '.?': name = '__main__' - - if isinstance(code_obj1, object) and cmp(code_obj1, code_obj2): - # use the new style code-classes' __cmp__ method, which - # should be faster and more sophisticated - # if this compare fails, we use the old routine to - # find out, what exactly is nor equal - # if this compare succeds, simply return - #return - pass - - if isinstance(code_obj1, object): - members = filter(lambda x: x.startswith('co_'), dir(code_obj1)) - else: - members = dir(code_obj1); - members.sort(); #members.reverse() - - tokens1 = None - for member in members: - if member in __IGNORE_CODE_MEMBERS__: - pass - elif member == 'co_code': - scanner = Scanner.getscanner(version) - scanner.setShowAsm( showasm=0 ) - global JUMP_OPs - JUMP_OPs = scanner.JUMP_OPs - - # use changed Token class - # we (re)set this here to save exception handling, - # which would get 'unubersichtlich' - scanner.setTokenClass(Token) - try: - # disassemble both code-objects - tokens1,customize = scanner.disassemble(code_obj1) - del customize # save memory - tokens2,customize = scanner.disassemble(code_obj2) - del customize # save memory - finally: - scanner.resetTokenClass() # restore Token class - - # compare length - if len(tokens1) != len(tokens2): - raise CmpErrorCodeLen(name, tokens1, tokens2) - # compare contents - #print len(tokens1), type(tokens1), type(tokens2) - for i in xrange(len(tokens1)): - if tokens1[i] != tokens2[i]: - #print '-->', i, type(tokens1[i]), type(tokens2[i]) - raise CmpErrorCode(name, i, tokens1[i], - tokens2[i], tokens1, tokens2) - del tokens1, tokens2 # save memory - elif member == 'co_consts': - # compare length - if len(code_obj1.co_consts) != len(code_obj2.co_consts): - raise CmpErrorConstsLen(name, code_obj1.co_consts ,code_obj2.co_consts) - # compare contents - for idx in xrange(len(code_obj1.co_consts)): - const1 = code_obj1.co_consts[idx] - const2 = code_obj2.co_consts[idx] - ## print code_obj1.co_consts[idx], '\t', - ## print code_obj2.co_consts[idx] - # same type? - if type(const1) != type(const2): - raise CmpErrorConstsType(name, idx) - if type(const1) == types.CodeType: - # code object -> recursive compare - cmp_code_objects(version, const1, - const2, name=name) - elif cmp(const1, const2) != 0: - # content differs - raise CmpErrorConsts(name, idx) - else: - # all other members must be equal - if getattr(code_obj1, member) != getattr(code_obj2, member): - raise CmpErrorMember(name, member, - getattr(code_obj1,member), - getattr(code_obj2,member)) + """ + Compare two code-objects. + + This is the main part of this module. + """ + #print code_obj1, type(code_obj2) + assert type(code_obj1) == types.CodeType + assert type(code_obj2) == types.CodeType + #print dir(code_obj1) + if isinstance(code_obj1, object): + # new style classes (Python 2.2) + # assume _both_ code objects to be new stle classes + assert dir(code_obj1) == dir(code_obj2) + else: + # old style classes + assert dir(code_obj1) == code_obj1.__members__ + assert dir(code_obj2) == code_obj2.__members__ + assert code_obj1.__members__ == code_obj2.__members__ + + if name == '__main__': + name = code_obj1.co_name + else: + name = '%s.%s' % (name, code_obj1.co_name) + if name == '.?': name = '__main__' + + if isinstance(code_obj1, object) and cmp(code_obj1, code_obj2): + # use the new style code-classes' __cmp__ method, which + # should be faster and more sophisticated + # if this compare fails, we use the old routine to + # find out, what exactly is nor equal + # if this compare succeds, simply return + #return + pass + + if isinstance(code_obj1, object): + members = filter(lambda x: x.startswith('co_'), dir(code_obj1)) + else: + members = dir(code_obj1); + members.sort(); #members.reverse() + + tokens1 = None + for member in members: + if member in __IGNORE_CODE_MEMBERS__: + pass + elif member == 'co_code': + scanner = Scanner.getscanner(version) + scanner.setShowAsm( showasm=0 ) + global JUMP_OPs + JUMP_OPs = scanner.JUMP_OPs + + # use changed Token class + # we (re)set this here to save exception handling, + # which would get 'unubersichtlich' + scanner.setTokenClass(Token) + try: + # disassemble both code-objects + tokens1,customize = scanner.disassemble(code_obj1) + del customize # save memory + tokens2,customize = scanner.disassemble(code_obj2) + del customize # save memory + finally: + scanner.resetTokenClass() # restore Token class + + tokens1 = [t for t in tokens1 if t.type != 'COME_FROM'] + tokens2 = [t for t in tokens2 if t.type != 'COME_FROM'] + # compare length + if len(tokens1) != len(tokens2): + #continue + raise CmpErrorCodeLen(name, tokens1, tokens2) + # compare contents + #print len(tokens1), type(tokens1), type(tokens2) + for i in xrange(len(tokens1)): + if tokens1[i] != tokens2[i]: + #print '-->', i, type(tokens1[i]), type(tokens2[i]) + raise CmpErrorCode(name, tokens1[i].offset, tokens1[i], + tokens2[i], tokens1, tokens2) + del tokens1, tokens2 # save memory + elif member == 'co_consts': + # partial optimization can make the co_consts look different + # , so we'll just compare the code consts + codes1 = ( c for c in code_obj1.co_consts if type(c) == types.CodeType ) + codes2 = ( c for c in code_obj2.co_consts if type(c) == types.CodeType ) + + for c1, c2 in zip(codes1, codes2): + cmp_code_objects(version, c1, c2, name=name) + else: + # all other members must be equal + if getattr(code_obj1, member) != getattr(code_obj2, member): + raise CmpErrorMember(name, member, + getattr(code_obj1,member), + getattr(code_obj2,member)) class Token(Scanner.Token): - """Token class with changed semantics for 'cmp()'.""" - - def __cmp__(self, o): - t = self.type # shortcut - if t in JUMP_OPs: - # ignore offset - return cmp(t, o.type) - else: - return cmp(t, o.type) \ - or cmp(self.pattr, o.pattr) - - def __repr__(self): - return '%s %s (%s)' % (str(self.type), str(self.attr), - repr(self.pattr)) - + """Token class with changed semantics for 'cmp()'.""" + + def __cmp__(self, o): + t = self.type # shortcut +# if t in JUMP_OPs: +# # ignore offset +# return cmp(t, o.type) +# else: + if t == 'LOAD_NAME' and o.type == 'LOAD_CONST': + if self.pattr == 'None' and o.pattr == None: + return 0 + if t == 'BUILD_TUPLE_0' and o.type == 'LOAD_CONST' and o.pattr == (): + return 0 + if t == 'COME_FROM' == o.type: + return 0 + if t == 'PRINT_ITEM_CONT' and o.type == 'PRINT_ITEM': + return 0 + return cmp(t, o.type) or cmp(self.pattr, o.pattr) + + def __repr__(self): + return '%s %s (%s)' % (str(self.type), str(self.attr), + repr(self.pattr)) + + def __str__(self): + return '%s\t%-17s %r' % (self.offset, self.type, self.pattr) def compare_code_with_srcfile(pyc_filename, src_filename): - """Compare a .pyc with a source code file.""" - version, code_obj1 = uncompyle2._load_module(pyc_filename) - code_obj2 = uncompyle2._load_file(src_filename) - cmp_code_objects(version, code_obj1, code_obj2) + """Compare a .pyc with a source code file.""" + version, code_obj1 = uncompyle2._load_module(pyc_filename) + code_obj2 = uncompyle2._load_file(src_filename) + cmp_code_objects(version, code_obj1, code_obj2) def compare_files(pyc_filename1, pyc_filename2): - """Compare two .pyc files.""" - version, code_obj1 = uncompyle2._load_module(pyc_filename1) - version, code_obj2 = uncompyle2._load_module(pyc_filename2) - cmp_code_objects(version, code_obj1, code_obj2) + """Compare two .pyc files.""" + version, code_obj1 = uncompyle2._load_module(pyc_filename1) + version, code_obj2 = uncompyle2._load_module(pyc_filename2) + cmp_code_objects(version, code_obj1, code_obj2) if __name__ == '__main__': - t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52) - t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55) - print `t1` - print `t2` - print cmp(t1, t2), cmp(t1.type, t2.type), cmp(t1.attr, t2.attr) + t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52) + t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55) + print `t1` + print `t2` + print cmp(t1, t2), cmp(t1.type, t2.type), cmp(t1.attr, t2.attr) From 33b22a7ee26731fd7a1a49f18441447b3ebcb397 Mon Sep 17 00:00:00 2001 From: wibiti Date: Mon, 5 Mar 2012 11:24:59 -0600 Subject: [PATCH 08/36] a few parsing fixes; verify fixes Verify, turned on with '--verify', is smarter now. Some equivalent but not identical bytecode sequences will verify ok. --- uncompyle2/Parser.py | 21 ++++--- uncompyle2/Scanner.py | 83 +++++++++++++++---------- uncompyle2/Walker.py | 55 ++++++----------- uncompyle2/verify.py | 140 ++++++++++++++++++++++++++++++++++++------ 4 files changed, 202 insertions(+), 97 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index ef7f84c..ebe3f56 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -261,7 +261,6 @@ def p_grammar(self, args): stmts ::= sstmt sstmt ::= stmt sstmt ::= ifelsestmtr - sstmt ::= return_stmt sstmt ::= return_stmt RETURN_LAST stmts_opt ::= stmts @@ -333,11 +332,20 @@ def p_grammar(self, args): stmt ::= call_stmt call_stmt ::= expr POP_TOP + stmt ::= return_stmt return_stmt ::= expr RETURN_VALUE + return_stmts ::= return_stmt + return_stmts ::= _stmts return_stmt + + return_if_stmts ::= return_if_stmt + return_if_stmts ::= _stmts return_if_stmt + return_if_stmt ::= expr RETURN_END_IF + stmt ::= break_stmt break_stmt ::= BREAK_LOOP + stmt ::= continue_stmt continue_stmt ::= CONTINUE continue_stmt ::= CONTINUE_LOOP continue_stmts ::= _stmts lastl_stmt continue_stmt @@ -421,7 +429,7 @@ def p_grammar(self, args): testfalse ::= expr jmp_false testtrue ::= expr jmp_true - _ifstmts_jump ::= return_stmts + _ifstmts_jump ::= return_if_stmts _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE @@ -432,7 +440,7 @@ def p_grammar(self, args): ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec - ifelsestmtr ::= testexpr return_stmts return_stmts + ifelsestmtr ::= testexpr return_if_stmts return_stmts ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel @@ -532,9 +540,6 @@ def p_grammar(self, args): forelselaststmtl ::= SETUP_LOOP expr _for designator for_block POP_BLOCK else_suitel COME_FROM - return_stmts ::= return_stmt - return_stmts ::= _stmts return_stmt - ''' def p_expr(self, args): @@ -636,9 +641,7 @@ def p_expr(self, args): stmt ::= conditional_lambda2 return_lambda ::= expr RETURN_VALUE LAMBDA_MARKER - conditional_lambda ::= expr POP_JUMP_IF_FALSE return_stmt return_stmt LAMBDA_MARKER - conditional_lambda2 ::= expr POP_JUMP_IF_FALSE expr POP_JUMP_IF_FALSE - return_stmt return_stmt LAMBDA_MARKER + conditional_lambda ::= expr POP_JUMP_IF_FALSE return_if_stmt return_stmt LAMBDA_MARKER cmp ::= cmp_list cmp ::= compare diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 4c39341..8821ce5 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -251,6 +251,9 @@ def unmangle(name): opname = 'LOAD_ASSERT' except AttributeError: pass + elif op == RETURN_VALUE: + if offset in self.return_end_ifs: + opname = 'RETURN_END_IF' if offset not in replace: rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) @@ -487,8 +490,7 @@ def next_except_jump(self, start): except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE) if except_match: jmp = self.prev[self.get_target(except_match)] - if self.code[jmp] not in (JA, JF, RETURN_VALUE): - print '############################', jmp, dis.opname[self.code[jmp]] + self.ignore_if.add(except_match) return jmp count_END_FINALLY = 0 @@ -540,6 +542,7 @@ def detect_structure(self, pos, op=None): origStructCount = len(self.structs) if op == SETUP_LOOP: + #import pdb; pdb.set_trace() start = pos+3 target = self.get_target(pos, op) end = self.restrict_to_parent(target, parent) @@ -549,42 +552,54 @@ def detect_structure(self, pos, op=None): (line_no, next_line_byte) = self.lines[pos] jump_back = self.last_instr(start, end, JA, next_line_byte, False) - if not jump_back: - return - - if self.get_target(jump_back) >= next_line_byte: - jump_back = self.last_instr(start, end, JA, - start, False) - - - if end > jump_back+4 and code[end] in (JF, JA): - if code[jump_back+4] in (JA, JF): - if self.get_target(jump_back+4) == self.get_target(end): - self.fixed_jumps[pos] = jump_back+4 - end = jump_back+4 - elif target < pos: - self.fixed_jumps[pos] = jump_back+4 - end = jump_back+4 - - target = self.get_target(jump_back, JA) - - if code[target] in (FOR_ITER, GET_ITER): - loop_type = 'for' + if not jump_back: # loop suite ends in return. wtf right? + jump_back = self.last_instr(start, end, RETURN_VALUE) + 1 + if not jump_back: + return + if code[self.prev[next_line_byte]] not in (PJIF, PJIT): + loop_type = 'for' + else: + loop_type = 'while' + self.ignore_if.add(self.prev[next_line_byte]) + target = next_line_byte + end = jump_back + 3 else: - loop_type = 'while' - test = self.prev[next_line_byte] - self.ignore_if.add(test) - test_target = self.get_target(test) - if test_target > (jump_back+3): - jump_back = test_target + if self.get_target(jump_back) >= next_line_byte: + jump_back = self.last_instr(start, end, JA, + start, False) + + if end > jump_back+4 and code[end] in (JF, JA): + if code[jump_back+4] in (JA, JF): + if self.get_target(jump_back+4) == self.get_target(end): + self.fixed_jumps[pos] = jump_back+4 + end = jump_back+4 + elif target < pos: + self.fixed_jumps[pos] = jump_back+4 + end = jump_back+4 + + target = self.get_target(jump_back, JA) + + if code[target] in (FOR_ITER, GET_ITER): + loop_type = 'for' + else: + loop_type = 'while' + test = self.prev[next_line_byte] + if test == pos: + loop_type = 'while 1' + else: + self.ignore_if.add(test) + test_target = self.get_target(test) + if test_target > (jump_back+3): + jump_back = test_target self.loops.append(target) self.structs.append({'type': loop_type + '-loop', 'start': target, 'end': jump_back}) - self.structs.append({'type': loop_type + '-else', - 'start': jump_back+3, - 'end': end}) + if jump_back+3 != end: + self.structs.append({'type': loop_type + '-else', + 'start': jump_back+3, + 'end': end}) elif op == SETUP_EXCEPT: start = pos+3 target = self.get_target(pos, op) @@ -693,7 +708,7 @@ def detect_structure(self, pos, op=None): if pre[next] == pos: pass elif code[next] in (JF, JA) and target == self.get_target(next): - if code[pre[next]] in (PJIF, PJIT): + if code[pre[next]] in (PJIF, PJIT) and not(code[self.prev[rtarget]] == JF and target > pos): self.fixed_jumps[pos] = pre[next] return elif code[next] == JA and code[target] in (JA, JF) \ @@ -734,6 +749,7 @@ def detect_structure(self, pos, op=None): self.structs.append({'type': 'if-then', 'start': start, 'end': rtarget}) + self.return_end_ifs.add(pre[rtarget]) elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): target = self.get_target(pos, op) @@ -769,6 +785,7 @@ def find_jump_targets(self, code): self.ignore_if = set() self.build_stmt_indices() self.not_continue = set() + self.return_end_ifs = set() targets = {} for i in self.op_range(0, n): diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 10234ac..8149cd4 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -417,7 +417,7 @@ def find_all_globals(node, globs): def find_none(node): for n in node: if isinstance(n, AST): - if not (n == 'return_stmt'): + if not (n == 'return_stmt' or n == 'return_if_stmt'): if find_none(n): return True elif n.type == 'LOAD_CONST' and n.pattr == None: @@ -597,6 +597,18 @@ def n_return_stmt(self, node): self.print_() self.prune() # stop recursing + def n_return_if_stmt(self, node): + if self.__params['isLambda']: + self.preorder(node[0]) + self.prune() + else: + self.write(self.indent, 'return') + if self.return_none or node != AST('return_if_stmt', [NONE, Token('RETURN_END_IF')]): + self.write(' ') + self.preorder(node[0]) + self.print_() + self.prune() # stop recursing + def n_yield(self, node): self.write('yield') if node != AST('yield', [NONE, Token('YIELD_VALUE')]): @@ -754,8 +766,8 @@ def n_ifelsestmtr(self, node): if len(node[2]) != 2: self.default(node) - if not (node[2][0][0][0] == 'ifstmt' and node[2][0][0][0][1][0] == 'return_stmts') \ - and not (node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_stmts'): + if not (node[2][0][0][0] == 'ifstmt' and node[2][0][0][0][1][0] == 'return_if_stmts') \ + and not (node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_if_stmts'): self.default(node) return @@ -768,13 +780,13 @@ def n_ifelsestmtr(self, node): if_ret_at_end = False if len(node[2][0]) >= 3: - if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_stmts': + if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_if_stmts': if_ret_at_end = True past_else = False prev_stmt_is_if_ret = True for n in node[2][0]: - if (n[0] == 'ifstmt' and n[0][1][0] == 'return_stmts'): + if (n[0] == 'ifstmt' and n[0][1][0] == 'return_if_stmts'): if prev_stmt_is_if_ret: n[0].type = 'elifstmt' prev_stmt_is_if_ret = True @@ -797,7 +809,7 @@ def n_elifelsestmtr(self, node): self.default(node) for n in node[2][0]: - if not (n[0] == 'ifstmt' and n[0][1][0] == 'return_stmts'): + if not (n[0] == 'ifstmt' and n[0][1][0] == 'return_if_stmts'): self.default(node) return @@ -810,7 +822,7 @@ def n_elifelsestmtr(self, node): if_ret_at_end = False if len(node[2][0]) >= 3: - if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_stmts': + if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_if_stmts': if_ret_at_end = True past_else = False @@ -1385,12 +1397,6 @@ def build_ast(self, tokens, customize, isLambda=0, noneInNames=False): self.print_(repr(ast)) return ast -# while(len(tokens) > 2): -# if (tokens[-1] == Token('RETURN_VALUE')) and (tokens[-2] == Token('LOAD_CONST') and (tokens[-3].type != 'END_IF_LINE')): -# del tokens[-2:] -# else: -# break - if len(tokens) > 2 or len(tokens) == 2 and not noneInNames: if tokens[-1] == Token('RETURN_VALUE'): if tokens[-2] == Token('LOAD_CONST'): @@ -1404,28 +1410,7 @@ def build_ast(self, tokens, customize, isLambda=0, noneInNames=False): try: ast = Parser.parse(tokens, customize) except Parser.ParserError, e: - try: - tokens.append(Token('LOAD_CONST')) - tokens.append(Token('RETURN_VALUE')) - ast = Parser.parse(tokens, customize) - except Parser.ParserError, e: - try: - del tokens[-2:] - Parser.p.addRule('stmt ::= continue_stmt', Parser.nop) - ast = Parser.parse(tokens, customize) - except Parser.ParserError, e: - try: - Parser.p.addRule('c_stmts ::= return_stmt', Parser.nop) - ast = Parser.parse(tokens, customize) - except: - try: - Parser.p.addRule('stmt ::= return_stmt', Parser.nop) - ast = Parser.parse(tokens, customize) - except: - raise ParserError(e, tokens) - finally: - Parser.p.cleanup() - Parser.p = Parser.Parser() + raise ParserError(e, tokens) if self.showast: diff --git a/uncompyle2/verify.py b/uncompyle2/verify.py index da88428..44bef99 100644 --- a/uncompyle2/verify.py +++ b/uncompyle2/verify.py @@ -5,8 +5,26 @@ # import types +import operator +import dis import uncompyle2, Scanner +BIN_OP_FUNCS = { +'BINARY_POWER': operator.pow, +'BINARY_MULTIPLY': operator.mul, +'BINARY_DIVIDE': operator.div, +'BINARY_FLOOR_DIVIDE': operator.floordiv, +'BINARY_TRUE_DIVIDE': operator.truediv, +'BINARY_MODULO' : operator.mod, +'BINARY_ADD': operator.add, +'BINARY_SUBRACT': operator.sub, +'BINARY_LSHIFT': operator.lshift, +'BINARY_RSHIFT': operator.rshift, +'BINARY_AND': operator.and_, +'BINARY_XOR': operator.xor, +'BINARY_OR': operator.or_, +} + JUMP_OPs = None #--- exceptions --- @@ -144,7 +162,7 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): scanner = Scanner.getscanner(version) scanner.setShowAsm( showasm=0 ) global JUMP_OPs - JUMP_OPs = scanner.JUMP_OPs + JUMP_OPs = scanner.JUMP_OPs + ['JUMP_BACK'] # use changed Token class # we (re)set this here to save exception handling, @@ -159,23 +177,101 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): finally: scanner.resetTokenClass() # restore Token class + targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.type != 'COME_FROM'] tokens2 = [t for t in tokens2 if t.type != 'COME_FROM'] - # compare length - if len(tokens1) != len(tokens2): - #continue - raise CmpErrorCodeLen(name, tokens1, tokens2) - # compare contents - #print len(tokens1), type(tokens1), type(tokens2) - for i in xrange(len(tokens1)): - if tokens1[i] != tokens2[i]: - #print '-->', i, type(tokens1[i]), type(tokens2[i]) - raise CmpErrorCode(name, tokens1[i].offset, tokens1[i], - tokens2[i], tokens1, tokens2) + + i1 = 0; i2 = 0 + offset_map = {}; check_jumps = {} + while i1 < len(tokens1): + if i2 >= len(tokens2): + if len(tokens1) == len(tokens2) + 2 \ + and tokens1[-1].type == 'RETURN_VALUE' \ + and tokens1[-2].type == 'LOAD_CONST' \ + and tokens1[-2].pattr == None \ + and tokens1[-3].type == 'RETURN_VALUE': + break + else: + raise CmpErrorCodeLen(name, tokens1, tokens2) + + offset_map[tokens1[i1].offset] = tokens2[i2].offset + + for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []): + if offset2 != tokens2[i2].offset: + raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], + tokens2[idx2], tokens1, tokens2) + + if tokens1[i1] != tokens2[i2]: + if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type: + i = 1 + while tokens1[i1+i].type == 'LOAD_CONST': + i += 1 + if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \ + and i == int(tokens1[i1+i].type.split('_')[-1]): + t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ]) + if t != tokens2[i2].pattr: + raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], + tokens2[i2], tokens1, tokens2) + i1 += i + 1 + i2 += 1 + continue + elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2': + i1 += 3 + i2 += 2 + continue + elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS: + f = BIN_OP_FUNCS[tokens1[i1+i].type] + if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr: + i1 += 3 + i2 += 1 + continue + elif tokens1[i1].type == 'UNARY_NOT': + if tokens2[i2].type == 'POP_JUMP_IF_TRUE': + if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE': + i1 += 2 + i2 += 1 + continue + elif tokens2[i2].type == 'POP_JUMP_IF_FALSE': + if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE': + i1 += 2 + i2 += 1 + continue + elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \ + and tokens1[i1-1].type == 'RETURN_VALUE' \ + and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \ + and int(tokens1[i1].offset) not in targets1: + i1 += 1 + continue + elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \ + and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \ + and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: + if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset): + i1 += 2 + i2 += 2 + continue + + raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], + tokens2[i2], tokens1, tokens2) + elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr: + dest1 = int(tokens1[i1].pattr) + dest2 = int(tokens2[i2].pattr) + if tokens1[i1].type == 'JUMP_BACK': + if offset_map[dest1] != dest2: + raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], + tokens2[i2], tokens1, tokens2) + else: + #import pdb; pdb.set_trace() + if dest1 in check_jumps: + check_jumps[dest1].append((i1,i2,dest2)) + else: + check_jumps[dest1] = [(i1,i2,dest2)] + + i1 += 1 + i2 += 1 del tokens1, tokens2 # save memory elif member == 'co_consts': - # partial optimization can make the co_consts look different - # , so we'll just compare the code consts + # partial optimization can make the co_consts look different, + # so we'll just compare the code consts codes1 = ( c for c in code_obj1.co_consts if type(c) == types.CodeType ) codes2 = ( c for c in code_obj2.co_consts if type(c) == types.CodeType ) @@ -190,14 +286,11 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): class Token(Scanner.Token): """Token class with changed semantics for 'cmp()'.""" - + def __cmp__(self, o): t = self.type # shortcut -# if t in JUMP_OPs: -# # ignore offset -# return cmp(t, o.type) -# else: - if t == 'LOAD_NAME' and o.type == 'LOAD_CONST': + loads = ('LOAD_NAME', 'LOAD_GLOBAL', 'LOAD_CONST') + if t in loads and o.type in loads: if self.pattr == 'None' and o.pattr == None: return 0 if t == 'BUILD_TUPLE_0' and o.type == 'LOAD_CONST' and o.pattr == (): @@ -206,6 +299,13 @@ def __cmp__(self, o): return 0 if t == 'PRINT_ITEM_CONT' and o.type == 'PRINT_ITEM': return 0 + if t == 'RETURN_VALUE' and o.type == 'RETURN_END_IF': + return 0 + if t == 'JUMP_IF_FALSE_OR_POP' and o.type == 'POP_JUMP_IF_FALSE': + return 0 + if t in JUMP_OPs: + # ignore offset + return cmp(t, o.type) return cmp(t, o.type) or cmp(self.pattr, o.pattr) def __repr__(self): From bce3dddb102e2acf1a2f2fea9f47eb2f66ca24c2 Mon Sep 17 00:00:00 2001 From: wibiti Date: Tue, 6 Mar 2012 13:18:47 -0600 Subject: [PATCH 09/36] fixes issue 16, and another fix --- uncompyle2/Scanner.py | 68 +++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 8821ce5..74ec24c 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -481,6 +481,46 @@ def remove_mid_line_ifs(self, ifs): return filtered + def rem_or(self, start, end, instr, target=None, include_beyond_target=False): + """ + Find all in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely. + + Return a list with indexes to them or [] if none found. + """ + + code = self.code + assert(start>=0 and end<=len(code)) + + try: None in instr + except: instr = [instr] + + result = [] + for i in self.op_range(start, end): + op = code[i] + if op in instr: + if target is None: + result.append(i) + else: + t = self.get_target(i, op) + if include_beyond_target and t >= target: + result.append(i) + elif t == target: + result.append(i) + + pjits = self.all_instr(start, end, PJIT) + filtered = [] + for pjit in pjits: + tgt = self.get_target(pjit)-3 + for i in result: + if i < pjit or i >= tgt: + filtered.append(i) + result = filtered + filtered = [] + return result + def next_except_jump(self, start): """ Return the next jump that was generated by an except SomeException: @@ -645,7 +685,6 @@ def detect_structure(self, pos, op=None): elif op in (PJIF, PJIT): - #import pdb; pdb.set_trace() start = pos+3 target = self.get_target(pos, op) rtarget = self.restrict_to_parent(target, parent) @@ -666,8 +705,7 @@ def detect_structure(self, pos, op=None): # is this an if and if op == PJIF: - #import pdb; pdb.set_trace() - match = self.all_instr(start, self.next_stmt[pos], (PJIF, PJIT), target) + match = self.rem_or(start, self.next_stmt[pos], PJIF, target) match = self.remove_mid_line_ifs(match) if match: if code[pre[rtarget]] in (JF, JA) \ @@ -677,22 +715,21 @@ def detect_structure(self, pos, op=None): and self.remove_mid_line_ifs([pos]) \ and target == self.get_target(pre[pre[rtarget]]) \ and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\ - and 1 == len(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ - (PJIF, PJIT), target))): + and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], PJIF, target))): pass elif code[pre[pre[rtarget]]] == RETURN_VALUE \ and self.remove_mid_line_ifs([pos]) \ - and 1 == (len(set(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ - (PJIF, PJIT), target))) \ - | set(self.remove_mid_line_ifs(self.all_instr(start, pre[pre[rtarget]], \ - (PJIF, PJIT, JA), pre[rtarget], True))))): + and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \ + PJIF, target))) \ + | set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \ + (PJIF, JA), pre[rtarget], True))))): pass else: fix = None - jump_ifs = self.all_instr(start, self.next_stmt[pos], (PJIF, PJIT)) + jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF) last_jump_good = True for j in jump_ifs: - if code[j] == PJIF and target == self.get_target(j): + if target == self.get_target(j): if self.lines[j].next == j+3 and last_jump_good: fix = j break @@ -703,14 +740,15 @@ def detect_structure(self, pos, op=None): else: self.fixed_jumps[pos] = match[-1] return - else: + else: # op == PJIT next = self.next_stmt[pos] if pre[next] == pos: pass elif code[next] in (JF, JA) and target == self.get_target(next): - if code[pre[next]] in (PJIF, PJIT) and not(code[self.prev[rtarget]] == JF and target > pos): - self.fixed_jumps[pos] = pre[next] - return + if code[pre[next]] == PJIF: + if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE): + self.fixed_jumps[pos] = pre[next] + return elif code[next] == JA and code[target] in (JA, JF) \ and self.get_target(target) == self.get_target(next): self.fixed_jumps[pos] = pre[next] From df8e752c4dcdd6e90248c20711a6f6c2563b1763 Mon Sep 17 00:00:00 2001 From: wibiti Date: Tue, 6 Mar 2012 20:39:53 -0600 Subject: [PATCH 10/36] another attempt at a fix --- uncompyle2/Scanner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 74ec24c..34b30a5 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -515,7 +515,7 @@ def rem_or(self, start, end, instr, target=None, include_beyond_target=False): for pjit in pjits: tgt = self.get_target(pjit)-3 for i in result: - if i < pjit or i >= tgt: + if i <= pjit or i >= tgt: filtered.append(i) result = filtered filtered = [] @@ -715,14 +715,14 @@ def detect_structure(self, pos, op=None): and self.remove_mid_line_ifs([pos]) \ and target == self.get_target(pre[pre[rtarget]]) \ and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\ - and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], PJIF, target))): + and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))): pass elif code[pre[pre[rtarget]]] == RETURN_VALUE \ and self.remove_mid_line_ifs([pos]) \ and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \ - PJIF, target))) \ + (PJIF, PJIT), target))) \ | set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \ - (PJIF, JA), pre[rtarget], True))))): + (PJIF, PJIT, JA), pre[rtarget], True))))): pass else: fix = None From 67c6d51849cc496cb57e726836204b921385e13a Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 6 Jun 2012 10:23:32 -0500 Subject: [PATCH 11/36] bug fix --- uncompyle2/Parser.py | 2 ++ uncompyle2/Scanner.py | 8 ++++++++ uncompyle2/Walker.py | 1 + 3 files changed, 11 insertions(+) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index ebe3f56..794588c 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -512,6 +512,8 @@ def p_grammar(self, args): while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK COME_FROM while1stmt ::= SETUP_LOOP return_stmts COME_FROM + while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK else_suite COME_FROM + whileelsestmt ::= SETUP_LOOP testexpr l_stmts_opt JUMP_BACK POP_BLOCK diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 34b30a5..aad9e54 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -592,6 +592,12 @@ def detect_structure(self, pos, op=None): (line_no, next_line_byte) = self.lines[pos] jump_back = self.last_instr(start, end, JA, next_line_byte, False) + + if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (JA, JF): + if code[self.prev[end]] == RETURN_VALUE or \ + (code[self.prev[end]] == POP_BLOCK and code[self.prev[self.prev[end]]] == RETURN_VALUE): + jump_back = None + if not jump_back: # loop suite ends in return. wtf right? jump_back = self.last_instr(start, end, RETURN_VALUE) + 1 if not jump_back: @@ -631,6 +637,8 @@ def detect_structure(self, pos, op=None): test_target = self.get_target(test) if test_target > (jump_back+3): jump_back = test_target + + self.not_continue.add(jump_back) self.loops.append(target) self.structs.append({'type': loop_type + '-loop', diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 8149cd4..b6c44e9 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -258,6 +258,7 @@ 'whilestmt': ( '%|while %c:\n%+%c%-\n\n', 1, 2 ), 'while1stmt': ( '%|while 1:\n%+%c%-\n\n', 1 ), + 'while1elsestmt': ( '%|while 1:\n%+%c%-%|else:\n%+%c%-\n\n', 1, 3 ), 'whileelsestmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-\n\n', 1, 2, -2 ), 'whileelselaststmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-', 1, 2, -2 ), 'forstmt': ( '%|for %c in %c:\n%+%c%-\n\n', 3, 1, 4 ), From 5f4ddc23e4c1409be1cb1779a0cfb10d29dab27b Mon Sep 17 00:00:00 2001 From: wibiti Date: Tue, 4 Sep 2012 10:38:33 -0500 Subject: [PATCH 12/36] Bug fix; find_globals takes a set and returns a set, not a dict --- uncompyle2/Walker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index b6c44e9..5e18b99 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -1361,7 +1361,7 @@ def build_class(self, code): #else: # print ast[-1][-1] - for g in find_globals(ast, {}).keys(): + for g in find_globals(ast, set()): self.print_(indent, 'global ', g) self.gen_source(ast, code._customize) From 71e9a4bb6df45acf6c74aa393fc476ae0de3bd5e Mon Sep 17 00:00:00 2001 From: Hartmut Goebel Date: Fri, 16 Nov 2012 19:16:25 +0100 Subject: [PATCH 13/36] Updating original authors email-address. --- PKG-INFO | 2 +- scripts/uncompyle2 | 2 +- setup.cfg | 2 +- setup.py | 2 +- uncompyle2/Parser.py | 2 +- uncompyle2/Scanner.py | 2 +- uncompyle2/Walker.py | 2 +- uncompyle2/__init__.py | 2 +- uncompyle2/verify.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/PKG-INFO b/PKG-INFO index c5e150b..79df902 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -4,7 +4,7 @@ Version: 1.1 Summary: Python byte-code to source-code converter Home-page: http://github.com/sysfrog/uncompyle Author: Hartmut Goebel -Author-email: hartmut@oberon.noris.de +Author-email: h.goebel@crazy-compilers.com License: GPLv3 Description: UNKNOWN Platform: UNKNOWN diff --git a/scripts/uncompyle2 b/scripts/uncompyle2 index 26067d2..1fcf6ea 100755 --- a/scripts/uncompyle2 +++ b/scripts/uncompyle2 @@ -1,7 +1,7 @@ #!/usr/bin/env python2.7 # Mode: -*- python -*- # -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2000-2002 by hartmut Goebel # """ Usage: uncompyle [OPTIONS]... [ FILE | DIR]... diff --git a/setup.cfg b/setup.cfg index ef17758..a9bf1b4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [bdist_rpm] release = 1 -packager = Hartmut Goebel +packager = Hartmut Goebel doc_files = README # CHANGES.txt # USAGE.txt diff --git a/setup.py b/setup.py index 47a0531..5570c36 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ version = "1.1", description = "Python byte-code to source-code converter", author = "Hartmut Goebel", - author_email = "hartmut@oberon.noris.de", + author_email = "h.goebel@crazy-compilers.com", url = "http://github.com/sysfrog/uncompyle", packages=['uncompyle2'], scripts=['scripts/uncompyle2'], diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index 794588c..e95c9f5 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -1,5 +1,5 @@ # Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 2005 by Dan Pascu # # See main module for license. diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index aad9e54..003cdfc 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -1,5 +1,5 @@ # Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 2005 by Dan Pascu # # See main module for license. diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 5e18b99..c615947 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -1,5 +1,5 @@ # Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 2005 by Dan Pascu # # See main module for license. diff --git a/uncompyle2/__init__.py b/uncompyle2/__init__.py index e223ed0..7d0e500 100644 --- a/uncompyle2/__init__.py +++ b/uncompyle2/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) 1999 John Aycock -# Copyright (c) 2000 by hartmut Goebel +# Copyright (c) 2000 by hartmut Goebel # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the diff --git a/uncompyle2/verify.py b/uncompyle2/verify.py index 44bef99..8a9239a 100644 --- a/uncompyle2/verify.py +++ b/uncompyle2/verify.py @@ -1,5 +1,5 @@ # -# (C) Copyright 2000-2002 by hartmut Goebel +# (C) Copyright 2000-2002 by hartmut Goebel # # byte-code verifier for uncompyle # From aa817157d4a357aa87bcabce5359dcef62d1de36 Mon Sep 17 00:00:00 2001 From: Hartmut Goebel Date: Fri, 16 Nov 2012 19:23:28 +0100 Subject: [PATCH 14/36] Update original authors URL. --- test/compile_tests | 2 +- test/test_applyEquiv.py | 2 +- test/test_augmentedAssign.py | 2 +- test/test_class.py | 2 +- test/test_del.py | 2 +- test/test_docstring.py | 2 +- test/test_exec.py | 2 +- test/test_expressions.py | 2 +- test/test_extendedImport.py | 2 +- test/test_extendedPrint.py | 2 +- test/test_extendedarg.py | 2 +- test/test_functions.py | 2 +- test/test_global.py | 2 +- test/test_globals.py | 2 +- test/test_import.py | 2 +- test/test_import_as.py | 2 +- test/test_integers.py | 2 +- test/test_lambda.py | 2 +- test/test_listComprehensions.py | 2 +- test/test_loops.py | 2 +- test/test_loops2.py | 2 +- test/test_misc.py | 2 +- test/test_nested_elif.py | 2 +- test/test_nested_scopes.py | 2 +- test/test_prettyprint.py | 2 +- test/test_print.py | 2 +- test/test_print_to.py | 2 +- test/test_slices.py | 2 +- test/test_tuple_params.py | 2 +- test/test_tuples.py | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) diff --git a/test/compile_tests b/test/compile_tests index 029ca6a..f37e06f 100644 --- a/test/compile_tests +++ b/test/compile_tests @@ -5,7 +5,7 @@ compile_tests -- compile test patterns for the decompyle test suite This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_applyEquiv.py b/test/test_applyEquiv.py index 02fff19..1c13ee2 100644 --- a/test/test_applyEquiv.py +++ b/test/test_applyEquiv.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information def kwfunc(**kwargs): diff --git a/test/test_augmentedAssign.py b/test/test_augmentedAssign.py index 7c5a3df..2e4b611 100644 --- a/test/test_augmentedAssign.py +++ b/test/test_augmentedAssign.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_class.py b/test/test_class.py index e610eb6..748790e 100644 --- a/test/test_class.py +++ b/test/test_class.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_del.py b/test/test_del.py index 26419f9..cd1fe17 100644 --- a/test/test_del.py +++ b/test/test_del.py @@ -5,7 +5,7 @@ Snippet taken from python libs's test_class.py decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_docstring.py b/test/test_docstring.py index a27b141..4a92005 100644 --- a/test/test_docstring.py +++ b/test/test_docstring.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information ''' diff --git a/test/test_exec.py b/test/test_exec.py index ed44815..9f1502a 100644 --- a/test/test_exec.py +++ b/test/test_exec.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information testcode = 'a = 12' diff --git a/test/test_expressions.py b/test/test_expressions.py index de11ae8..b00dd37 100644 --- a/test/test_expressions.py +++ b/test/test_expressions.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information def _lsbStrToInt(str): diff --git a/test/test_extendedImport.py b/test/test_extendedImport.py index 277d4c7..efd3a3f 100644 --- a/test/test_extendedImport.py +++ b/test/test_extendedImport.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information import os, sys as System, time diff --git a/test/test_extendedPrint.py b/test/test_extendedPrint.py index c0a64d3..b1f2801 100644 --- a/test/test_extendedPrint.py +++ b/test/test_extendedPrint.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information import sys diff --git a/test/test_extendedarg.py b/test/test_extendedarg.py index ee9562d..2f7d98e 100644 --- a/test/test_extendedarg.py +++ b/test/test_extendedarg.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information for i in range(1<<15+1): diff --git a/test/test_functions.py b/test/test_functions.py index 7b793e3..805a526 100644 --- a/test/test_functions.py +++ b/test/test_functions.py @@ -3,7 +3,7 @@ # This source is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information def x0(): diff --git a/test/test_global.py b/test/test_global.py index c789eb8..f4d24f9 100644 --- a/test/test_global.py +++ b/test/test_global.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_globals.py b/test/test_globals.py index 6e9cad0..0ae3c09 100644 --- a/test/test_globals.py +++ b/test/test_globals.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information def f(): diff --git a/test/test_import.py b/test/test_import.py index 73d1b1b..dc7317a 100644 --- a/test/test_import.py +++ b/test/test_import.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_import_as.py b/test/test_import_as.py index 5695c9c..f5a5b43 100644 --- a/test/test_import_as.py +++ b/test/test_import_as.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_integers.py b/test/test_integers.py index 45ac600..5df52ed 100644 --- a/test/test_integers.py +++ b/test/test_integers.py @@ -5,7 +5,7 @@ Snippet taken from python libs's test_class.py decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_lambda.py b/test/test_lambda.py index eb214bf..f85dc33 100644 --- a/test/test_lambda.py +++ b/test/test_lambda.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information palette = map(lambda a: (a,a,a), range(256)) diff --git a/test/test_listComprehensions.py b/test/test_listComprehensions.py index 9e0f11e..0e8aa2b 100644 --- a/test/test_listComprehensions.py +++ b/test/test_listComprehensions.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information XXX = range(4) diff --git a/test/test_loops.py b/test/test_loops.py index c5e11b5..e7f2429 100644 --- a/test/test_loops.py +++ b/test/test_loops.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_loops2.py b/test/test_loops2.py index 50c1188..09da4ee 100644 --- a/test/test_loops2.py +++ b/test/test_loops2.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_misc.py b/test/test_misc.py index f7a74c8..03ef6c9 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -4,7 +4,7 @@ # Snippet taken from python libs's test_class.py # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information raise "This program can't be run" diff --git a/test/test_nested_elif.py b/test/test_nested_elif.py index 8aac638..f8a52ed 100644 --- a/test/test_nested_elif.py +++ b/test/test_nested_elif.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information a = None diff --git a/test/test_nested_scopes.py b/test/test_nested_scopes.py index 32646e1..e3d7e04 100644 --- a/test/test_nested_scopes.py +++ b/test/test_nested_scopes.py @@ -3,7 +3,7 @@ # This source is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information from __future__ import nested_scopes diff --git a/test/test_prettyprint.py b/test/test_prettyprint.py index 957d72e..6c326d9 100644 --- a/test/test_prettyprint.py +++ b/test/test_prettyprint.py @@ -5,7 +5,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_print.py b/test/test_print.py index dd30661..f9f9e89 100644 --- a/test/test_print.py +++ b/test/test_print.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information print 1,2,3,4,5 diff --git a/test/test_print_to.py b/test/test_print_to.py index a6901ad..45d62e7 100644 --- a/test/test_print_to.py +++ b/test/test_print_to.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ import sys diff --git a/test/test_slices.py b/test/test_slices.py index 1580e5e..f449c9c 100644 --- a/test/test_slices.py +++ b/test/test_slices.py @@ -5,7 +5,7 @@ Snippet taken from python libs's test_class.py decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_tuple_params.py b/test/test_tuple_params.py index 7cab1b1..469493f 100644 --- a/test/test_tuple_params.py +++ b/test/test_tuple_params.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_tuples.py b/test/test_tuples.py index 57362bd..15e973a 100644 --- a/test/test_tuples.py +++ b/test/test_tuples.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ From d182696e494db93197e0194c1381b6c1f6fc5831 Mon Sep 17 00:00:00 2001 From: Hartmut Goebel Date: Fri, 16 Nov 2012 19:28:03 +0100 Subject: [PATCH 15/36] Add credits to the original author. --- README | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README b/README index ac8b6a5..86bf02a 100644 --- a/README +++ b/README @@ -19,13 +19,14 @@ by compiling it and comparing both byte-codes. 'uncompyle2' is based on John Aycock's generic small languages compiler 'spark' (http://www.csr.uvic.ca/~aycock/python/) and his prior work on -'uncompyle'. +a tool called 'decompyle'. This tool has been vastly improved by +Hartmut Goebel `http://www.crazy-compilers.com/`_ Additional note (3 July 2004, Ben Burton): - The original website from which this software was obtained is no longer - available. It has now become a commercial decompilation service, with - no software available for download. + This software is no longer available from the original website. It + has now become a commercial decompilation service, with no + software available for download. Any developers seeking to make alterations or enhancements to this code should therefore consider these debian packages an appropriate starting From a85de1863953031f01b691930fbeffb3ef8f2047 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 11 Feb 2013 22:00:11 -0600 Subject: [PATCH 16/36] partial fix for multiprocessing in windows environment --- scripts/uncompyle2 | 260 +++++++++++++++++++++++---------------------- 1 file changed, 132 insertions(+), 128 deletions(-) diff --git a/scripts/uncompyle2 b/scripts/uncompyle2 index 1fcf6ea..9c043c9 100755 --- a/scripts/uncompyle2 +++ b/scripts/uncompyle2 @@ -24,7 +24,7 @@ Options: -> /tmp/smtplib.dis ... /tmp/lib-tk/FixTk.dis -c attempts a disassembly after compiling -d do not print timestamps - -p use number of processes + -m use multiprocessing -r recurse directories looking for .pyc and .pyo files --verify compare generated source with input byte-code (requires -o) @@ -44,144 +44,148 @@ Usage_short = \ "decomyple [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." import sys, os, getopt -import os.path from uncompyle2 import main, verify import time +from multiprocessing import Process, Queue, cpu_count +from Queue import Empty -if sys.version[:3] != '2.7': - print >>sys.stderr, 'Error: uncompyle2 requires Python 2.7.' - sys.exit(-1) - -showasm = showast = do_verify = numproc = recurse_dirs = 0 -outfile = '-' -out_base = None -codes = [] -timestamp = True -timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" +def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, do_verify): + try: + (tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0) + while 1: + f = fq.get() + if f == None: + break + (t, o, f, v) = \ + main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify) + tot_files += t + okay_files += o + failed_files += f + verify_failed_files += v + except (Empty, KeyboardInterrupt): + pass + rq.put((tot_files, okay_files, failed_files, verify_failed_files)) + rq.close() -try: - opts, files = getopt.getopt(sys.argv[1:], 'hatdro:c:p:', - ['help', 'verify', 'showast', 'showasm']) -except getopt.GetoptError, e: - print >>sys.stderr, '%s: %s' % (os.path.basename(sys.argv[0]), e) - sys.exit(-1) +if __name__ == '__main__': ## for Windows multiprocessing -for opt, val in opts: - if opt in ('-h', '--help'): - print __doc__ - sys.exit(0) - elif opt == '--verify': - do_verify = 1 - elif opt in ('--showasm', '-a'): - showasm = 1 - do_verify = 0 - elif opt in ('--showast', '-t'): - showast = 1 - do_verify = 0 - elif opt == '-o': - outfile = val - elif opt == '-d': - timestamp = False - elif opt == '-c': - codes.append(val) - elif opt == '-p': - numproc = int(val) - elif opt == '-r': - recurse_dirs = 1 - else: - print opt - print Usage_short - sys.exit(1) + if sys.version[:3] != '2.7': + print >>sys.stderr, 'Error: uncompyle2 requires Python 2.7.' + sys.exit(-1) + + showasm = showast = do_verify = multi = recurse_dirs = 0 + outfile = '-' + out_base = None + codes = [] + timestamp = True + timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" -# expand directory if specified -if recurse_dirs: - expanded_files = [] - for f in files: - if os.path.isdir(f): - for root, _, dir_files in os.walk(f): - for df in dir_files: - if df.endswith('.pyc') or df.endswith('.pyo'): - expanded_files.append(os.path.join(root, df)) - files = expanded_files + try: + opts, files = getopt.getopt(sys.argv[1:], 'hatdrmo:c:', + ['help', 'verify', 'showast', 'showasm']) + except getopt.GetoptError, e: + print >>sys.stderr, '%s: %s' % (os.path.basename(sys.argv[0]), e) + sys.exit(-1) -# argl, commonprefix works on strings, not on path parts, -# thus we must handle the case with files in 'some/classes' -# and 'some/cmds' -src_base = os.path.commonprefix(files) -if src_base[-1:] != os.sep: - src_base = os.path.dirname(src_base) -if src_base: - sb_len = len( os.path.join(src_base, '') ) - files = map(lambda f: f[sb_len:], files) - del sb_len - -if outfile == '-': - outfile = None # use stdout -elif outfile and os.path.isdir(outfile): - out_base = outfile; outfile = None -elif outfile and len(files) > 1: - out_base = outfile; outfile = None + for opt, val in opts: + if opt in ('-h', '--help'): + print __doc__ + sys.exit(0) + elif opt == '--verify': + do_verify = 1 + elif opt in ('--showasm', '-a'): + showasm = 1 + do_verify = 0 + elif opt in ('--showast', '-t'): + showast = 1 + do_verify = 0 + elif opt == '-o': + outfile = val + elif opt == '-d': + timestamp = False + elif opt == '-c': + codes.append(val) + elif opt == '-m': + multi = 1 + elif opt == '-r': + recurse_dirs = 1 + else: + print opt + print Usage_short + sys.exit(1) -if timestamp: - print time.strftime(timestampfmt) -if numproc <= 1: - try: - result = main(src_base, out_base, files, codes, outfile, showasm, showast, do_verify) - print '# decompiled %i files: %i okay, %i failed, %i verify failed' % result - except (KeyboardInterrupt): - pass - except verify.VerifyCmpError: - raise -else: - from multiprocessing import Process, Queue - from Queue import Empty - fqueue = Queue(len(files)+numproc) - for f in files: - fqueue.put(f) - for i in range(numproc): - fqueue.put(None) + # expand directory if specified + if recurse_dirs: + expanded_files = [] + for f in files: + if os.path.isdir(f): + for root, _, dir_files in os.walk(f): + for df in dir_files: + if df.endswith('.pyc') or df.endswith('.pyo'): + expanded_files.append(os.path.join(root, df)) + files = expanded_files + + # argl, commonprefix works on strings, not on path parts, + # thus we must handle the case with files in 'some/classes' + # and 'some/cmds' + src_base = os.path.commonprefix(files) + if src_base[-1:] != os.sep: + src_base = os.path.dirname(src_base) + if src_base: + sb_len = len( os.path.join(src_base, '') ) + files = map(lambda f: f[sb_len:], files) + del sb_len - rqueue = Queue(numproc) - - def process_func(): - try: - (tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0) - while 1: - f = fqueue.get() - if f == None: - break - (t, o, f, v) = \ - main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify) - tot_files += t - okay_files += o - failed_files += f - verify_failed_files += v - except (Empty, KeyboardInterrupt): - pass - rqueue.put((tot_files, okay_files, failed_files, verify_failed_files)) - rqueue.close() + if outfile == '-': + outfile = None # use stdout + elif outfile and os.path.isdir(outfile): + out_base = outfile; outfile = None + elif outfile and len(files) > 1: + out_base = outfile; outfile = None - try: - procs = [Process(target=process_func) for i in range(numproc)] - for p in procs: - p.start() - for p in procs: - p.join() + if timestamp: + print time.strftime(timestampfmt) + if not multi: try: - (tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0) - while 1: - (t, o, f, v) = rqueue.get(False) - tot_files += t - okay_files += o - failed_files += f - verify_failed_files += v - except Empty: + result = main(src_base, out_base, files, codes, outfile, showasm, showast, do_verify) + print '# decompiled %i files: %i okay, %i failed, %i verify failed' % result + except (KeyboardInterrupt): pass - print '# decompiled %i files: %i okay, %i failed, %i verify failed' % \ - (tot_files, okay_files, failed_files, verify_failed_files) - except (KeyboardInterrupt, OSError): - pass + except verify.VerifyCmpError: + raise + else: + numproc = cpu_count() + fqueue = Queue(len(files)+numproc) + for f in files: + fqueue.put(f) + for i in range(numproc): + fqueue.put(None) + + rqueue = Queue(numproc) + try: + procs = [Process(target=process_func, \ + args=(fqueue, rqueue, src_base, out_base, codes, outfile, showasm, showast, do_verify)) \ + for i in range(numproc)] + for p in procs: + p.start() + for p in procs: + p.join() + try: + (tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0) + while 1: + (t, o, f, v) = rqueue.get(False) + tot_files += t + okay_files += o + failed_files += f + verify_failed_files += v + except Empty: + pass + print '# decompiled %i files: %i okay, %i failed, %i verify failed' % \ + (tot_files, okay_files, failed_files, verify_failed_files) + except (KeyboardInterrupt, OSError): + pass + -if timestamp: - print time.strftime(timestampfmt) + if timestamp: + print time.strftime(timestampfmt) From 3630f5038ee3e72e7a9d33851cf02eb7f4deb556 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 12 Feb 2013 13:26:55 -0600 Subject: [PATCH 17/36] fixed a bug with a conditional within an and/or in a return statement --- uncompyle2/Parser.py | 21 +++++++++++++++++---- uncompyle2/Walker.py | 25 +++++++++++++++++++------ 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index e95c9f5..6f1b807 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -333,13 +333,13 @@ def p_grammar(self, args): call_stmt ::= expr POP_TOP stmt ::= return_stmt - return_stmt ::= expr RETURN_VALUE + return_stmt ::= ret_expr RETURN_VALUE return_stmts ::= return_stmt return_stmts ::= _stmts return_stmt return_if_stmts ::= return_if_stmt return_if_stmts ::= _stmts return_if_stmt - return_if_stmt ::= expr RETURN_END_IF + return_if_stmt ::= ret_expr RETURN_END_IF stmt ::= break_stmt @@ -636,11 +636,24 @@ def p_expr(self, args): conditional ::= expr POP_JUMP_IF_FALSE expr JUMP_FORWARD expr COME_FROM conditional ::= expr POP_JUMP_IF_FALSE expr JUMP_ABSOLUTE expr expr ::= conditionalnot - conditionalnot ::= expr POP_JUMP_IF_TRUE expr _jump expr COME_FROM + conditionalnot ::= expr POP_JUMP_IF_TRUE expr JUMP_FORWARD expr COME_FROM + conditionalnot ::= expr POP_JUMP_IF_TRUE expr JUMP_ABSOLUTE expr + + ret_expr ::= expr + ret_expr ::= ret_and + ret_expr ::= ret_or + + ret_expr_or_cond ::= ret_expr + ret_expr_or_cond ::= ret_cond + ret_expr_or_cond ::= ret_cond_not + + ret_and ::= expr JUMP_IF_FALSE_OR_POP ret_expr_or_cond COME_FROM + ret_or ::= expr JUMP_IF_TRUE_OR_POP ret_expr_or_cond COME_FROM + ret_cond ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF ret_expr_or_cond + ret_cond_not ::= expr POP_JUMP_IF_TRUE expr RETURN_END_IF ret_expr_or_cond stmt ::= return_lambda stmt ::= conditional_lambda - stmt ::= conditional_lambda2 return_lambda ::= expr RETURN_VALUE LAMBDA_MARKER conditional_lambda ::= expr POP_JUMP_IF_FALSE return_if_stmt return_stmt LAMBDA_MARKER diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index c615947..54c783b 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -53,7 +53,7 @@ # the end of functions). RETURN_LOCALS = AST('return_stmt', - [ AST('expr', [ Token('LOAD_LOCALS') ]), + [ AST('ret_expr', [AST('expr', [ Token('LOAD_LOCALS') ])]), Token('RETURN_VALUE')]) @@ -198,13 +198,15 @@ # 'dup_topx': ( '%c', 0), 'designList': ( '%c = %c', 0, -1 ), 'and': ( '%c and %c', 0, 2 ), + 'ret_and': ( '%c and %c', 0, 2 ), 'and2': ( '%c', 3 ), 'or': ( '%c or %c', 0, 2 ), + 'ret_or': ( '%c or %c', 0, 2 ), 'conditional': ( '%p if %p else %p', (2,27), (0,27), (4,27)), - 'conditionaland': ( '%p if %p and %p else %p', (4,27), (0,24), (2,24), (6,27)), + 'ret_cond': ( '%p if %p else %p', (2,27), (0,27), (4,27)), 'conditionalnot': ( '%p if not %p else %p', (2,27), (0,22), (4,27)), + 'ret_cond_not': ( '%p if not %p else %p', (2,27), (0,22), (4,27)), 'conditional_lambda': ( '(%c if %c else %c)', 2, 0, 3), - 'conditional_lambda2': ( '(%c if %p and %p else %c)', 4, (0,24), (2,24), 5), 'return_lambda': ('%c', 0), 'compare': ( '%p %[-1]{pattr} %p', (0,19), (1,19) ), 'cmp_list': ( '%p %p', (0,20), (1,19)), @@ -364,12 +366,15 @@ 'unary_not': 22, 'and': 24, + 'ret_and': 24, 'or': 26, + 'ret_or': 26, 'conditional': 28, - 'conditionaland': 28, 'conditionalnot': 28, + 'ret_cond': 28, + 'ret_cond_not': 28, '_mklambda': 30, 'yield': 101 @@ -592,7 +597,7 @@ def n_return_stmt(self, node): self.prune() else: self.write(self.indent, 'return') - if self.return_none or node != AST('return_stmt', [NONE, Token('RETURN_VALUE')]): + if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_VALUE')]): self.write(' ') self.preorder(node[0]) self.print_() @@ -604,7 +609,7 @@ def n_return_if_stmt(self, node): self.prune() else: self.write(self.indent, 'return') - if self.return_none or node != AST('return_if_stmt', [NONE, Token('RETURN_END_IF')]): + if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_END_IF')]): self.write(' ') self.preorder(node[0]) self.print_() @@ -667,6 +672,14 @@ def n_expr(self, node): self.prec = p self.prune() + def n_ret_expr(self, node): + if len(node) == 1 and node[0] == 'expr': + self.n_expr(node[0]) + else: + self.n_expr(node) + + n_ret_expr_or_cond = n_expr + def n_binary_expr(self, node): self.preorder(node[0]) self.write(' ') From 78f72db67b5743cfb3685079aea58dbfde1e11b8 Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 13 Feb 2013 21:56:39 -0600 Subject: [PATCH 18/36] Apply previous fix to lambdas, oops --- uncompyle2/Parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index 6f1b807..0c38225 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -655,7 +655,7 @@ def p_expr(self, args): stmt ::= return_lambda stmt ::= conditional_lambda - return_lambda ::= expr RETURN_VALUE LAMBDA_MARKER + return_lambda ::= ret_expr RETURN_VALUE LAMBDA_MARKER conditional_lambda ::= expr POP_JUMP_IF_FALSE return_if_stmt return_stmt LAMBDA_MARKER cmp ::= cmp_list From 3582eff425131bc95e0b58239f03cb3d124f38b7 Mon Sep 17 00:00:00 2001 From: wibiti Date: Fri, 15 Feb 2013 12:56:56 -0600 Subject: [PATCH 19/36] little bug fixes --- uncompyle2/Parser.py | 12 ++++++------ uncompyle2/Scanner.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index 0c38225..c0a47b7 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -233,9 +233,7 @@ def p_import20(self, args): importlist2 ::= importlist2 import_as importlist2 ::= import_as import_as ::= IMPORT_NAME designator - import_as ::= IMPORT_NAME LOAD_ATTR designator - import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR designator - import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR LOAD_ATTR designator + import_as ::= IMPORT_NAME load_attrs designator import_as ::= IMPORT_FROM designator importstmt ::= LOAD_CONST LOAD_CONST import_as @@ -249,10 +247,11 @@ def p_import20(self, args): imports_cont ::= import_cont import_cont ::= LOAD_CONST LOAD_CONST import_as_cont import_as_cont ::= IMPORT_NAME_CONT designator - import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR designator - import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR designator - import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR LOAD_ATTR designator + import_as_cont ::= IMPORT_NAME_CONT load_attrs designator import_as_cont ::= IMPORT_FROM designator + + load_attrs ::= LOAD_ATTR + load_attrs ::= load_attrs LOAD_ATTR ''' def p_grammar(self, args): @@ -472,6 +471,7 @@ def p_grammar(self, args): except_suite ::= c_stmts_opt JUMP_FORWARD except_suite ::= c_stmts_opt jmp_abs except_suite ::= return_stmts + except_suite ::= continue_stmts except_cond1 ::= DUP_TOP expr COMPARE_OP POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 003cdfc..f4ab49d 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -230,7 +230,7 @@ def unmangle(name): # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == BUILD_TUPLE and \ - code[offset-3] == LOAD_CLOSURE: + code[self.prev[offset]] == LOAD_CLOSURE: continue else: opname = '%s_%d' % (opname, oparg) From 6e96b6483cb524fe206663a430bf2ec2a5326425 Mon Sep 17 00:00:00 2001 From: wibiti Date: Fri, 15 Feb 2013 14:55:10 -0600 Subject: [PATCH 20/36] fix a parse error due to a pass stmt misidentified as a continue stmt prior to end of a loop --- uncompyle2/Scanner.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index f4ab49d..2807e31 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -767,12 +767,20 @@ def detect_structure(self, pos, op=None): return if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \ - and pre[rtarget] != pos and pre[pre[rtarget]] != pos \ - and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA): - rtarget = pre[rtarget] + and pre[rtarget] != pos and pre[pre[rtarget]] != pos: + if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK: + if code[pre[pre[rtarget]]] != JA: + pass + elif self.get_target(pre[pre[rtarget]]) != target: + pass + else: + rtarget = pre[rtarget] + else: + rtarget = pre[rtarget] #does the if jump just beyond a jump op, then this is probably an if statement if code[pre[rtarget]] in (JA, JF): + #import pdb; pdb.set_trace() if_end = self.get_target(pre[rtarget]) #is this a loop not an if? From 5aaaea5e8464c0071035bf7ce9006ea2f66d7439 Mon Sep 17 00:00:00 2001 From: wibiti Date: Fri, 15 Feb 2013 22:28:08 -0600 Subject: [PATCH 21/36] Fix a bug where a pass stmt at the end of an except stmt suite is mistaken for a continue stmt when inside a loop. --- uncompyle2/Scanner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 2807e31..43b783e 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -531,6 +531,7 @@ def next_except_jump(self, start): if except_match: jmp = self.prev[self.get_target(except_match)] self.ignore_if.add(except_match) + self.not_continue.add(jmp) return jmp count_END_FINALLY = 0 @@ -540,6 +541,7 @@ def next_except_jump(self, start): if op == END_FINALLY: if count_END_FINALLY == count_SETUP_: assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE) + self.not_continue.add(self.prev[i]) return self.prev[i] count_END_FINALLY += 1 elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): From 384667831b143cb9ee875602b7e8b63d4c29b929 Mon Sep 17 00:00:00 2001 From: wibiti Date: Fri, 15 Feb 2013 22:29:44 -0600 Subject: [PATCH 22/36] Bug fix for parsing extended assert statements --- uncompyle2/Parser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index c0a47b7..7086a8c 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -404,7 +404,7 @@ def p_grammar(self, args): LOAD_ASSERT RAISE_VARARGS assert2 ::= assert_expr POP_JUMP_IF_TRUE - LOAD_ASSERT expr RAISE_VARARGS + LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS assert_expr ::= expr assert_expr ::= assert_expr_or @@ -471,7 +471,6 @@ def p_grammar(self, args): except_suite ::= c_stmts_opt JUMP_FORWARD except_suite ::= c_stmts_opt jmp_abs except_suite ::= return_stmts - except_suite ::= continue_stmts except_cond1 ::= DUP_TOP expr COMPARE_OP POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP From 1a6aa522652c4eb968b226730e438e72ad081d4f Mon Sep 17 00:00:00 2001 From: wibiti Date: Mon, 18 Feb 2013 01:02:55 -0600 Subject: [PATCH 23/36] Fix bugs -Add parens for tuple unpacked from exception matched in except clause. -Better identification of assert statement bytecode in scanner. -Verify now ignores differences in docstrings because uncompyle2 reformats some docstrings. -Fixes another bug in scanner. --- uncompyle2/Walker.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 54c783b..8a4609c 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -238,8 +238,11 @@ 'call_stmt': ( '%|%p\n', (0,200)), 'break_stmt': ( '%|break\n', ), 'continue_stmt': ( '%|continue\n', ), - 'jcontinue_stmt': ( '%|continue\n', ), - 'raise_stmt': ( '%|raise %[0]C\n', (0,sys.maxint,', ') ), + + 'raise_stmt0': ( '%|raise\n', ), + 'raise_stmt1': ( '%|raise %c\n', 0), + 'raise_stmt2': ( '%|raise %c, %c\n', 0, 1), + 'raise_stmt3': ( '%|raise %c, %c, %c\n', 0, 1, 2), # 'yield': ( 'yield %c', 0), # 'return_stmt': ( '%|return %c\n', 0), @@ -1084,6 +1087,11 @@ def n_assign3(self, node): if n[0] == 'unpack': n[0].type = 'unpack_w_parens' self.default(node) + + def n_except_cond2(self, node): + if node[5][0] == 'unpack': + node[5][0].type = 'unpack_w_parens' + self.default(node) def engine(self, entry, startnode): #self.print_("-----") From 3472f27dac246397e35fd9a2c0ac8e1b45c09b44 Mon Sep 17 00:00:00 2001 From: wibiti Date: Mon, 18 Feb 2013 01:04:07 -0600 Subject: [PATCH 24/36] Fix bugs -Add parens for tuple unpacked from exception matched in except clause. -Better identification of assert statement bytecode in scanner. -Verify now ignores differences in docstrings because uncompyle2 reformats some docstrings. -Fixes another bug in scanner. --- uncompyle2/Parser.py | 19 ++++++++++++------- uncompyle2/Scanner.py | 38 +++++++++++++++++++++++++------------- uncompyle2/verify.py | 5 +++++ 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index 7086a8c..2c91926 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -351,9 +351,15 @@ def p_grammar(self, args): continue_stmts ::= lastl_stmt continue_stmt continue_stmts ::= continue_stmt - stmt ::= raise_stmt - raise_stmt ::= exprlist RAISE_VARARGS - raise_stmt ::= nullexprlist RAISE_VARARGS + stmt ::= raise_stmt0 + stmt ::= raise_stmt1 + stmt ::= raise_stmt2 + stmt ::= raise_stmt3 + + raise_stmt0 ::= RAISE_VARARGS_0 + raise_stmt1 ::= expr RAISE_VARARGS_1 + raise_stmt2 ::= expr expr RAISE_VARARGS_2 + raise_stmt3 ::= expr expr expr RAISE_VARARGS_3 stmt ::= exec_stmt exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT @@ -401,10 +407,10 @@ def p_grammar(self, args): classdefdeco2 ::= LOAD_CONST expr mkfunc CALL_FUNCTION_0 BUILD_CLASS assert ::= assert_expr POP_JUMP_IF_TRUE - LOAD_ASSERT RAISE_VARARGS + LOAD_ASSERT RAISE_VARARGS_1 assert2 ::= assert_expr POP_JUMP_IF_TRUE - LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS + LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 assert_expr ::= expr assert_expr ::= assert_expr_or @@ -550,7 +556,6 @@ def p_expr(self, args): expr ::= LOAD_FAST expr ::= LOAD_NAME expr ::= LOAD_CONST - expr ::= LOAD_ASSERT expr ::= LOAD_GLOBAL expr ::= LOAD_DEREF expr ::= LOAD_LOCALS @@ -749,7 +754,7 @@ def parse(tokens, customize): rule = 'unpack ::= ' + k + ' designator'*v elif op == 'UNPACK_LIST': rule = 'unpack_list ::= ' + k + ' designator'*v - elif op == 'DUP_TOPX': + elif op in ('DUP_TOPX', 'RAISE_VARARGS'): # no need to add a rule continue #rule = 'dup_topx ::= ' + 'expr '*v + k diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index 43b783e..d148b90 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -105,7 +105,6 @@ def disassemble(self, co, classname=None): n = len(code) self.prev = [0] for i in self.op_range(0, n): - c = code[i] op = code[i] self.prev.append(i) if op >= HAVE_ARGUMENT: @@ -128,8 +127,6 @@ def disassemble(self, co, classname=None): self.lines.append(linetuple(prev_line_no, n)) j+=1 - cf = self.find_jump_targets(code) - if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): @@ -145,6 +142,14 @@ def unmangle(name): names = co.co_names varnames = co.co_varnames + self.load_asserts = set() + for i in self.op_range(0, n): + if code[i] == PJIT and code[i+3] == LOAD_GLOBAL: + if names[code[i+4] + 256*code[i+5]] == 'AssertionError': + self.load_asserts.add(i+3) + + cf = self.find_jump_targets(code) + last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} @@ -224,7 +229,7 @@ def unmangle(name): UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, - CALL_FUNCTION_VAR_KW, DUP_TOPX, + CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into @@ -246,11 +251,8 @@ def unmangle(name): opname = 'JUMP_BACK' elif op == LOAD_GLOBAL: - try: - if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE': - opname = 'LOAD_ASSERT' - except AttributeError: - pass + if offset in self.load_asserts: + opname = 'LOAD_ASSERT' elif op == RETURN_VALUE: if offset in self.return_end_ifs: opname = 'RETURN_END_IF' @@ -751,6 +753,11 @@ def detect_structure(self, pos, op=None): self.fixed_jumps[pos] = match[-1] return else: # op == PJIT + if (pos+3) in self.load_asserts: + if code[pre[rtarget]] == RAISE_VARARGS: + return + self.load_asserts.remove(pos+3) + next = self.next_stmt[pos] if pre[next] == pos: pass @@ -759,10 +766,15 @@ def detect_structure(self, pos, op=None): if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE): self.fixed_jumps[pos] = pre[next] return - elif code[next] == JA and code[target] in (JA, JF) \ - and self.get_target(target) == self.get_target(next): - self.fixed_jumps[pos] = pre[next] - return + elif code[next] == JA and code[target] in (JA, JF): + next_target = self.get_target(next) + if self.get_target(target) == next_target: + self.fixed_jumps[pos] = pre[next] + return + elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target): + self.fixed_jumps[pos] = pre[next] + return + #don't add a struct for a while test, it's already taken care of if pos in self.ignore_if: diff --git a/uncompyle2/verify.py b/uncompyle2/verify.py index 8a9239a..6b2403a 100644 --- a/uncompyle2/verify.py +++ b/uncompyle2/verify.py @@ -225,6 +225,11 @@ def cmp_code_objects(version, code_obj1, code_obj2, name=''): i1 += 3 i2 += 1 continue + elif i == 1 and tokens1[i1+i].type == 'STORE_NAME' == tokens2[i2+i].type \ + and tokens1[i1+i].pattr == '__doc__' == tokens2[i2+i].pattr: + i1 += 2 + i2 += 2 + continue elif tokens1[i1].type == 'UNARY_NOT': if tokens2[i2].type == 'POP_JUMP_IF_TRUE': if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE': From 8e2f11eefdcbe2ca688a90d12b24024e4428fdaa Mon Sep 17 00:00:00 2001 From: wibiti Date: Tue, 19 Feb 2013 00:36:34 -0600 Subject: [PATCH 25/36] Change command line options. directories will be recursed by default now. --norecur to disable --py will give decompiled files a .py file extension. source paths will be preserved into the destination path now, unless -s option is used --- scripts/uncompyle2 | 68 +++++++++++++++++++++++++----------------- uncompyle2/__init__.py | 8 +++-- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/scripts/uncompyle2 b/scripts/uncompyle2 index 9c043c9..a142d85 100755 --- a/scripts/uncompyle2 +++ b/scripts/uncompyle2 @@ -4,12 +4,12 @@ # Copyright (c) 2000-2002 by hartmut Goebel # """ -Usage: uncompyle [OPTIONS]... [ FILE | DIR]... +Usage: uncompyle2 [OPTIONS]... [ FILE | DIR]... Examples: - uncompyle foo.pyc bar.pyc # uncompyle foo.pyc, bar.pyc to stdout - uncompyle -o . foo.pyc bar.pyc # uncompyle to ./foo.dis and ./bar.dis - uncompyle -o /tmp /usr/lib/python1.5 # uncompyle whole library + uncompyle2 foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout + uncompyle2 -o . foo.pyc bar.pyc # decompile to ./foo.dis and ./bar.dis + uncompyle2 -o /tmp /usr/lib/python1.5 # decompile whole library Options: -o output decompiled files to this path: @@ -20,12 +20,16 @@ Options: -> /tmp/fasel.dis, /tmp/foo.dis uncompyle -o /tmp bla/fasel.pyc bar/foo.pyc -> /tmp/bla/fasel.dis, /tmp/bar/foo.dis + -s if multiple input files are decompiled, the common prefix + is stripped from these names and the remainder appended to + uncompyle -o /tmp /usr/lib/python1.5 -> /tmp/smtplib.dis ... /tmp/lib-tk/FixTk.dis -c attempts a disassembly after compiling -d do not print timestamps -m use multiprocessing - -r recurse directories looking for .pyc and .pyo files + --py use '.py' extension for generated files + --norecur don't recurse directories looking for .pyc and .pyo files --verify compare generated source with input byte-code (requires -o) --help show this message @@ -35,13 +39,14 @@ Debugging Options: --showast -t include AST (abstract syntax tree) (disables --verify) Extensions of generated files: - '.dis' successfully decompiled (and verified if --verify) - '.dis_unverified' successfully decompile but --verify failed - '.nodis' uncompyle failed (contact author for enhancement) + '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) + '.py' with --py option + + '_unverified' successfully decompile but --verify failed + + '_failed' uncompyle failed (contact author for enhancement) """ Usage_short = \ -"decomyple [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." +"uncompyle2 [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." import sys, os, getopt from uncompyle2 import main, verify @@ -49,7 +54,7 @@ import time from multiprocessing import Process, Queue, cpu_count from Queue import Empty -def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, do_verify): +def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, do_verify, py): try: (tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0) while 1: @@ -57,7 +62,7 @@ def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, d if f == None: break (t, o, f, v) = \ - main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify) + main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify, py) tot_files += t okay_files += o failed_files += f @@ -73,7 +78,7 @@ if __name__ == '__main__': ## for Windows multiprocessing print >>sys.stderr, 'Error: uncompyle2 requires Python 2.7.' sys.exit(-1) - showasm = showast = do_verify = multi = recurse_dirs = 0 + showasm = showast = do_verify = multi = norecur = strip_common_path = py = 0 outfile = '-' out_base = None codes = [] @@ -81,8 +86,8 @@ if __name__ == '__main__': ## for Windows multiprocessing timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" try: - opts, files = getopt.getopt(sys.argv[1:], 'hatdrmo:c:', - ['help', 'verify', 'showast', 'showasm']) + opts, files = getopt.getopt(sys.argv[1:], 'hatdrmso:c:', + ['help', 'verify', 'showast', 'showasm', 'norecur', 'py']) except getopt.GetoptError, e: print >>sys.stderr, '%s: %s' % (os.path.basename(sys.argv[0]), e) sys.exit(-1) @@ -107,15 +112,19 @@ if __name__ == '__main__': ## for Windows multiprocessing codes.append(val) elif opt == '-m': multi = 1 - elif opt == '-r': - recurse_dirs = 1 + elif opt == '--norecur': + norecur = 1 + elif opt == '-s': + strip_common_path = 1 + elif opt == '--py': + name_modification_function = lambda x: os.path.splitext(x)[0]+'.py' else: print opt print Usage_short sys.exit(1) # expand directory if specified - if recurse_dirs: + if not norecur: expanded_files = [] for f in files: if os.path.isdir(f): @@ -128,13 +137,16 @@ if __name__ == '__main__': ## for Windows multiprocessing # argl, commonprefix works on strings, not on path parts, # thus we must handle the case with files in 'some/classes' # and 'some/cmds' - src_base = os.path.commonprefix(files) - if src_base[-1:] != os.sep: - src_base = os.path.dirname(src_base) - if src_base: - sb_len = len( os.path.join(src_base, '') ) - files = map(lambda f: f[sb_len:], files) - del sb_len + if strip_common_path: + src_base = os.path.commonprefix(files) + if src_base[-1:] != os.sep: + src_base = os.path.dirname(src_base) + if src_base: + sb_len = len( os.path.join(src_base, '') ) + files = map(lambda f: f[sb_len:], files) + del sb_len + else: + src_base = '' if outfile == '-': outfile = None # use stdout @@ -147,7 +159,8 @@ if __name__ == '__main__': ## for Windows multiprocessing print time.strftime(timestampfmt) if not multi: try: - result = main(src_base, out_base, files, codes, outfile, showasm, showast, do_verify) + result = main(src_base, out_base, files, codes, outfile, + showasm, showast, do_verify, py) print '# decompiled %i files: %i okay, %i failed, %i verify failed' % result except (KeyboardInterrupt): pass @@ -164,8 +177,9 @@ if __name__ == '__main__': ## for Windows multiprocessing rqueue = Queue(numproc) try: - procs = [Process(target=process_func, \ - args=(fqueue, rqueue, src_base, out_base, codes, outfile, showasm, showast, do_verify)) \ + procs = [Process(target=process_func, + args=(fqueue, rqueue, src_base, out_base, codes, outfile, + showasm, showast, do_verify, py)) for i in range(numproc)] for p in procs: p.start() diff --git a/uncompyle2/__init__.py b/uncompyle2/__init__.py index 7d0e500..7178f3a 100644 --- a/uncompyle2/__init__.py +++ b/uncompyle2/__init__.py @@ -141,7 +141,7 @@ def __memUsage(): return '' def main(in_base, out_base, files, codes, outfile=None, - showasm=0, showast=0, do_verify=0): + showasm=0, showast=0, do_verify=0, py=0): """ in_base base directory for input files out_base base directory for output files (ignored when @@ -182,7 +182,11 @@ def _get_outstream(outfile): elif out_base is None: outstream = sys.stdout else: - outfile = os.path.join(out_base, file) + '_dis' + outfile = os.path.join(out_base, file) + if py: + outfile = outfile[:-1] + else: + outfile += '_dis' outstream = _get_outstream(outfile) #print >>sys.stderr, outfile From 25091312bbdcd887840427cd6bedac94d72bc6fa Mon Sep 17 00:00:00 2001 From: wibiti Date: Tue, 19 Feb 2013 00:54:39 -0600 Subject: [PATCH 26/36] Fix --py option --- scripts/uncompyle2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/uncompyle2 b/scripts/uncompyle2 index a142d85..a3cffae 100755 --- a/scripts/uncompyle2 +++ b/scripts/uncompyle2 @@ -117,7 +117,7 @@ if __name__ == '__main__': ## for Windows multiprocessing elif opt == '-s': strip_common_path = 1 elif opt == '--py': - name_modification_function = lambda x: os.path.splitext(x)[0]+'.py' + py = 1 else: print opt print Usage_short From fded61952d1cabf051bd9d656c4e6470173b47b9 Mon Sep 17 00:00:00 2001 From: wibiti Date: Tue, 19 Feb 2013 13:16:13 -0600 Subject: [PATCH 27/36] back out a bugfix that was not fully working --- uncompyle2/Scanner.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index d148b90..a8ced6d 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -821,12 +821,12 @@ def detect_structure(self, pos, op=None): elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): target = self.get_target(pos, op) - if target > pos: - unop_target = self.last_instr(pos, target, JF, target) - if unop_target and code[unop_target+3] != ROT_TWO: - self.fixed_jumps[pos] = unop_target - else: - self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) +# if target > pos: +# unop_target = self.last_instr(pos, target, JF, target) +# if unop_target and code[unop_target+3] != ROT_TWO: +# self.fixed_jumps[pos] = unop_target +# else: + self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) From f5bb85b33ba30394a74529f96a7a525f55b720fb Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 27 Feb 2013 13:22:35 -0600 Subject: [PATCH 28/36] Fix parse for alternate byte code format of extended assert statement --- uncompyle2/Parser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index 2c91926..967f2c8 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -412,6 +412,9 @@ def p_grammar(self, args): assert2 ::= assert_expr POP_JUMP_IF_TRUE LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 + assert2 ::= assert_expr POP_JUMP_IF_TRUE + LOAD_ASSERT expr RAISE_VARARGS_2 + assert_expr ::= expr assert_expr ::= assert_expr_or assert_expr ::= assert_expr_and From 0a51259ad8aec44f7144abc6ef4a7f29e0f37ed1 Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 27 Feb 2013 13:50:26 -0600 Subject: [PATCH 29/36] Fix file name command line arguments --- scripts/uncompyle2 | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/uncompyle2 b/scripts/uncompyle2 index a3cffae..1db9521 100755 --- a/scripts/uncompyle2 +++ b/scripts/uncompyle2 @@ -49,6 +49,9 @@ Usage_short = \ "uncompyle2 [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." import sys, os, getopt +if sys.version[:3] != '2.7': + print >>sys.stderr, 'Error: uncompyle2 requires Python 2.7.' + sys.exit(-1) from uncompyle2 import main, verify import time from multiprocessing import Process, Queue, cpu_count @@ -73,10 +76,6 @@ def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, d rq.close() if __name__ == '__main__': ## for Windows multiprocessing - - if sys.version[:3] != '2.7': - print >>sys.stderr, 'Error: uncompyle2 requires Python 2.7.' - sys.exit(-1) showasm = showast = do_verify = multi = norecur = strip_common_path = py = 0 outfile = '-' @@ -132,6 +131,8 @@ if __name__ == '__main__': ## for Windows multiprocessing for df in dir_files: if df.endswith('.pyc') or df.endswith('.pyo'): expanded_files.append(os.path.join(root, df)) + else: + expanded_files.append(f) files = expanded_files # argl, commonprefix works on strings, not on path parts, From 697fde1448e8aa46fdd4f58ab3b96ef8771e8460 Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 27 Feb 2013 23:33:24 -0600 Subject: [PATCH 30/36] Add a newline to end of decompiled files --- uncompyle2/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uncompyle2/__init__.py b/uncompyle2/__init__.py index 7178f3a..a06fa1b 100644 --- a/uncompyle2/__init__.py +++ b/uncompyle2/__init__.py @@ -117,6 +117,8 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): walker.gen_source(ast, customize) for g in walker.mod_globs: walker.write('global %s ## Warning: Unused global\n' % g) + if walker.pending_newlines: + print >>__real_out if walker.ERROR: raise walker.ERROR From d4a2a49632e4355956e056a2f4a247923445fbad Mon Sep 17 00:00:00 2001 From: wibiti Date: Tue, 5 Mar 2013 23:31:58 -0600 Subject: [PATCH 31/36] Fix decompile for some bytecode strings with unusual opcodes at the end --- uncompyle2/Scanner.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index a8ced6d..df18a9f 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -102,7 +102,14 @@ def disassemble(self, co, classname=None): customize = {} Token = self.Token # shortcut self.code = code = array('B', co.co_code) + n = len(code) + for i in self.op_range(0, len(code)): + if code[i] in (RETURN_VALUE, END_FINALLY): + n = i + 1 + + self.code = code = array('B', co.co_code[:n]) + self.prev = [0] for i in self.op_range(0, n): op = code[i] From be336ddcf66a13f2599712775b1c5428b1199b5a Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 13 Mar 2013 14:14:36 -0500 Subject: [PATCH 32/36] add --deob Adds a --deob flag to partially deobfuscate some code objects seen. Don't use --deob for normal code as it will rename local variables. --- scripts/uncompyle2 | 14 ++++---- uncompyle2/Scanner.py | 76 ++++++++++++++++++++++++++++++++++++------ uncompyle2/__init__.py | 14 ++++---- 3 files changed, 81 insertions(+), 23 deletions(-) diff --git a/scripts/uncompyle2 b/scripts/uncompyle2 index 1db9521..0f57f83 100755 --- a/scripts/uncompyle2 +++ b/scripts/uncompyle2 @@ -57,7 +57,7 @@ import time from multiprocessing import Process, Queue, cpu_count from Queue import Empty -def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, do_verify, py): +def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, do_verify, py, deob): try: (tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0) while 1: @@ -65,7 +65,7 @@ def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, d if f == None: break (t, o, f, v) = \ - main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify, py) + main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify, py, deob) tot_files += t okay_files += o failed_files += f @@ -77,7 +77,7 @@ def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, d if __name__ == '__main__': ## for Windows multiprocessing - showasm = showast = do_verify = multi = norecur = strip_common_path = py = 0 + showasm = showast = do_verify = multi = norecur = strip_common_path = py = deob = 0 outfile = '-' out_base = None codes = [] @@ -86,7 +86,7 @@ if __name__ == '__main__': ## for Windows multiprocessing try: opts, files = getopt.getopt(sys.argv[1:], 'hatdrmso:c:', - ['help', 'verify', 'showast', 'showasm', 'norecur', 'py']) + ['help', 'verify', 'showast', 'showasm', 'norecur', 'py', 'deob']) except getopt.GetoptError, e: print >>sys.stderr, '%s: %s' % (os.path.basename(sys.argv[0]), e) sys.exit(-1) @@ -117,6 +117,8 @@ if __name__ == '__main__': ## for Windows multiprocessing strip_common_path = 1 elif opt == '--py': py = 1 + elif opt == '--deob': + deob = 1 else: print opt print Usage_short @@ -161,7 +163,7 @@ if __name__ == '__main__': ## for Windows multiprocessing if not multi: try: result = main(src_base, out_base, files, codes, outfile, - showasm, showast, do_verify, py) + showasm, showast, do_verify, py, deob) print '# decompiled %i files: %i okay, %i failed, %i verify failed' % result except (KeyboardInterrupt): pass @@ -180,7 +182,7 @@ if __name__ == '__main__': ## for Windows multiprocessing try: procs = [Process(target=process_func, args=(fqueue, rqueue, src_base, out_base, codes, outfile, - showasm, showast, do_verify, py)) + showasm, showast, do_verify, py, deob)) for i in range(numproc)] for p in procs: p.start() diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index df18a9f..c9ee50c 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -90,25 +90,82 @@ def setTokenClass(self, tokenClass): def resetTokenClass(self): self.setTokenClass(Token) + + def deobfuscate(self, co, linestarts, varnames): + n = 0 + code = self.code + for i in self.op_range(0, len(code)): + if code[i] in (RETURN_VALUE, END_FINALLY): + n = i + 1 + + fixed_code = array('B') + linestartoffsets = {a:b for (a, b) in linestarts[1:]} + newlinestarts = linestarts[0:1] + old_to_new = {} + new_to_old = {} + m = 0 + for i in self.op_range(0, n): + old_to_new[i] = m + new_to_old[m] = i + if i in linestartoffsets: + newlinestarts.append( (m, linestartoffsets[i]) ) + if code[i] != NOP: + fixed_code.append(code[i]) + m += 1 + if code[i] >= HAVE_ARGUMENT: + fixed_code.append(code[i+1]) + fixed_code.append(code[i+2]) + m += 2 + + self.code = code = fixed_code + for i in self.op_range(0, m): + if code[i] in dis.hasjrel: + #import pdb; pdb.set_trace() + old_jump = code[i+1] + code[i+2]*256 + old_target = new_to_old[i] + 3 + old_jump + new_target = old_to_new[old_target] + new_jump = new_target - i - 3 + code[i+1] = new_jump % 256 + code[i+2] = new_jump // 256 + if code[i] in dis.hasjabs: + old_target = code[i+1] + code[i+2]*256 + new_target = old_to_new[old_target] + code[i+1] = new_target % 256 + code[i+2] = new_target // 256 + + for i in range(len(varnames)): + varnames[i] = 'varnames_%s' % i + + for i in self.op_range(0, m): + if code[i] == IMPORT_NAME and code[i+3] == STORE_FAST: + varname_index = code[i+4] + code[i+5]*256 + name_index = code[i+1] + code[i+2]*256 + varnames[varname_index] = co.co_names[name_index] + - def disassemble(self, co, classname=None): + return newlinestarts + + + def disassemble(self, co, classname=None, deob=0): """ Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). """ + #import pdb; pdb.set_trace() rv = [] customize = {} Token = self.Token # shortcut - self.code = code = array('B', co.co_code) + self.code = array('B', co.co_code) + linestarts = list(dis.findlinestarts(co)) + varnames = list(co.co_varnames) + if deob: + linestarts = self.deobfuscate(co, linestarts, varnames) + + code = self.code n = len(code) - for i in self.op_range(0, len(code)): - if code[i] in (RETURN_VALUE, END_FINALLY): - n = i + 1 - - self.code = code = array('B', co.co_code[:n]) self.prev = [0] for i in self.op_range(0, n): @@ -121,7 +178,7 @@ def disassemble(self, co, classname=None): self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) j = 0 - linestarts = list(dis.findlinestarts(co)) + linestartoffsets = {a for (a, _) in linestarts} (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: @@ -143,11 +200,10 @@ def unmangle(name): free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [ unmangle(name) for name in co.co_names ] - varnames = [ unmangle(name) for name in co.co_varnames ] + varnames = [ unmangle(name) for name in varnames ] else: free = co.co_cellvars + co.co_freevars names = co.co_names - varnames = co.co_varnames self.load_asserts = set() for i in self.op_range(0, n): diff --git a/uncompyle2/__init__.py b/uncompyle2/__init__.py index a06fa1b..e6f8cc3 100644 --- a/uncompyle2/__init__.py +++ b/uncompyle2/__init__.py @@ -78,7 +78,7 @@ def _load_module(filename): fp.close() return version, co -def uncompyle(version, co, out=None, showasm=0, showast=0): +def uncompyle(version, co, out=None, showasm=0, showast=0, deob=0): """ diassembles a given code block 'co' """ @@ -90,7 +90,7 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): print >>__real_out, '#Embedded file name: %s' % co.co_filename scanner = Scanner.getscanner(version) scanner.setShowAsm(showasm, out) - tokens, customize = scanner.disassemble(co) + tokens, customize = scanner.disassemble(co, deob=deob) # Build AST from disassembly. walker = Walker.Walker(out, scanner, showast=showast) @@ -122,12 +122,12 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): if walker.ERROR: raise walker.ERROR -def uncompyle_file(filename, outstream=None, showasm=0, showast=0): +def uncompyle_file(filename, outstream=None, showasm=0, showast=0, deob=0): """ decompile Python byte-code file (.pyc) """ version, co = _load_module(filename) - uncompyle(version, co, outstream, showasm, showast) + uncompyle(version, co, outstream, showasm, showast, deob) co = None #---- main ------- @@ -143,7 +143,7 @@ def __memUsage(): return '' def main(in_base, out_base, files, codes, outfile=None, - showasm=0, showast=0, do_verify=0, py=0): + showasm=0, showast=0, do_verify=0, py=0, deob=0): """ in_base base directory for input files out_base base directory for output files (ignored when @@ -173,7 +173,7 @@ def _get_outstream(outfile): version = sys.version[:3] # "2.5" with open(code, "r") as f: co = compile(f.read(), "", "exec") - uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast) + uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast, deob=deob) for file in files: infile = os.path.join(in_base, file) @@ -194,7 +194,7 @@ def _get_outstream(outfile): # try to decomyple the input file try: - uncompyle_file(infile, outstream, showasm, showast) + uncompyle_file(infile, outstream, showasm, showast, deob) tot_files += 1 except KeyboardInterrupt: if outfile: From 9873ce674c8aebb93f4edc67dab97007bce8a720 Mon Sep 17 00:00:00 2001 From: wibiti Date: Fri, 15 Mar 2013 16:50:12 -0500 Subject: [PATCH 33/36] Fix parse error in multiline exception expression matching not a perfect fix, but good enough --- uncompyle2/Scanner.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/uncompyle2/Scanner.py b/uncompyle2/Scanner.py index c9ee50c..c34d32e 100644 --- a/uncompyle2/Scanner.py +++ b/uncompyle2/Scanner.py @@ -592,12 +592,13 @@ def next_except_jump(self, start): construct in a try...except...else clause or None if not found. """ - except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE) - if except_match: - jmp = self.prev[self.get_target(except_match)] - self.ignore_if.add(except_match) - self.not_continue.add(jmp) - return jmp + if self.code[start] == DUP_TOP: + except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE) + if except_match: + jmp = self.prev[self.get_target(except_match)] + self.ignore_if.add(except_match) + self.not_continue.add(jmp) + return jmp count_END_FINALLY = 0 count_SETUP_ = 0 @@ -857,7 +858,6 @@ def detect_structure(self, pos, op=None): #does the if jump just beyond a jump op, then this is probably an if statement if code[pre[rtarget]] in (JA, JF): - #import pdb; pdb.set_trace() if_end = self.get_target(pre[rtarget]) #is this a loop not an if? From c443db784d8a7c8345e1b7cc94edd11a534708ac Mon Sep 17 00:00:00 2001 From: wibiti Date: Tue, 19 Mar 2013 00:35:10 -0500 Subject: [PATCH 34/36] Fix code generation of function calls issue Function calls inside generator expressions, set comprehensions, and dict comprehensions were sometimes mangled due to not invoking _customize on the associated anonymous function. --- uncompyle2/Walker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 8a4609c..0920002 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -916,6 +916,7 @@ def comprehension_walk(self, node, iter_index): #assert isinstance(code, Code) ast = self.build_ast(code._tokens, code._customize) + self.customize(code._customize) ast = ast[0][0][0] n = ast[iter_index] From dc856f127ae27ede4d040f636163c4880974462d Mon Sep 17 00:00:00 2001 From: wibiti Date: Wed, 4 Jun 2014 15:31:48 -0500 Subject: [PATCH 35/36] Ugly partial fix for huge lists/tuples This somewhat improves cpu/memory consumption for huge (>10000) list/tuple decompilation. Such lists/tuples are pretty unusual but apparently they do exist in the real world. --- uncompyle2/Parser.py | 5 ++++- uncompyle2/Walker.py | 36 +++++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/uncompyle2/Parser.py b/uncompyle2/Parser.py index 967f2c8..aa62a49 100644 --- a/uncompyle2/Parser.py +++ b/uncompyle2/Parser.py @@ -693,6 +693,9 @@ def p_expr(self, args): exprlist ::= expr nullexprlist ::= + + expr32 ::= expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr + expr1024 ::= expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 ''' def nonterminal(self, nt, args): @@ -752,7 +755,7 @@ def parse(tokens, customize): #nop = lambda self, args: None op = k[:string.rfind(k, '_')] if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'): - rule = 'build_list ::= ' + 'expr '*v + k + rule = 'build_list ::= ' + 'expr1024 '*(v/1024) + 'expr32 '*((v/32)%32) + 'expr '*(v%32) + k elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): rule = 'unpack ::= ' + k + ' designator'*v elif op == 'UNPACK_LIST': diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 0920002..8a356f2 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -1036,33 +1036,51 @@ def n_build_list(self, node): """ p = self.prec self.prec = 100 - lastnode = node.pop().type - if lastnode.startswith('BUILD_LIST'): + lastnode = node.pop() + lastnodetype = lastnode.type + if lastnodetype.startswith('BUILD_LIST'): self.write('['); endchar = ']' - elif lastnode.startswith('BUILD_TUPLE'): + elif lastnodetype.startswith('BUILD_TUPLE'): self.write('('); endchar = ')' - elif lastnode.startswith('BUILD_SET'): + elif lastnodetype.startswith('BUILD_SET'): self.write('{'); endchar = '}' - elif lastnode.startswith('ROT_TWO'): + elif lastnodetype.startswith('ROT_TWO'): self.write('('); endchar = ')' else: raise 'Internal Error: n_build_list expects list or tuple' self.indentMore(INDENT_PER_LEVEL) - if len(node) > 3: + if lastnode.attr > 3: line_separator = ',\n' + self.indent else: line_separator = ', ' sep = INDENT_PER_LEVEL[:-1] for elem in node: - if (elem == 'ROT_THREE'): + if elem == 'ROT_THREE': + continue + if elem == 'expr1024': + for subelem in elem: + if subelem == 'expr32': + for subsubelem in subelem: + value = self.traverse(subsubelem) + self.write(sep, value) + sep = line_separator + continue + value = self.traverse(subelem) + self.write(sep, value) + sep = line_separator + continue + if elem == 'expr32': + for subelem in elem: + value = self.traverse(subelem) + self.write(sep, value) + sep = line_separator continue - assert elem == 'expr' value = self.traverse(elem) self.write(sep, value) sep = line_separator - if len(node) == 1 and lastnode.startswith('BUILD_TUPLE'): + if lastnode.attr == 1 and lastnodetype.startswith('BUILD_TUPLE'): self.write(',') self.write(endchar) self.indentLess(INDENT_PER_LEVEL) From dffbdc49ef54fa821d83dc7f7b64cbdda886a51e Mon Sep 17 00:00:00 2001 From: wibiti Date: Thu, 5 Jun 2014 10:56:42 -0500 Subject: [PATCH 36/36] Cleanup the last little fix --- uncompyle2/Walker.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/uncompyle2/Walker.py b/uncompyle2/Walker.py index 8a356f2..9ae9205 100644 --- a/uncompyle2/Walker.py +++ b/uncompyle2/Walker.py @@ -1049,33 +1049,28 @@ def n_build_list(self, node): else: raise 'Internal Error: n_build_list expects list or tuple' + flat_elems = [] + for elem in node: + if elem == 'expr1024': + for subelem in elem: + for subsubelem in subelem: + flat_elems.append(subsubelem) + elif elem == 'expr32': + for subelem in elem: + flat_elems.append(subelem) + else: + flat_elems.append(elem) + self.indentMore(INDENT_PER_LEVEL) if lastnode.attr > 3: line_separator = ',\n' + self.indent else: line_separator = ', ' sep = INDENT_PER_LEVEL[:-1] - for elem in node: + + for elem in flat_elems: if elem == 'ROT_THREE': continue - if elem == 'expr1024': - for subelem in elem: - if subelem == 'expr32': - for subsubelem in subelem: - value = self.traverse(subsubelem) - self.write(sep, value) - sep = line_separator - continue - value = self.traverse(subelem) - self.write(sep, value) - sep = line_separator - continue - if elem == 'expr32': - for subelem in elem: - value = self.traverse(subelem) - self.write(sep, value) - sep = line_separator - continue assert elem == 'expr' value = self.traverse(elem) self.write(sep, value)