diff --git a/PKG-INFO b/PKG-INFO index c5e150b..79df902 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -4,7 +4,7 @@ Version: 1.1 Summary: Python byte-code to source-code converter Home-page: http://github.com/sysfrog/uncompyle Author: Hartmut Goebel -Author-email: hartmut@oberon.noris.de +Author-email: h.goebel@crazy-compilers.com License: GPLv3 Description: UNKNOWN Platform: UNKNOWN diff --git a/README b/README index 9c4ec78..9c62e0d 100644 --- a/README +++ b/README @@ -1,13 +1,15 @@ - uncompyle -- A Python 2.7 byte-code decompiler - 0.12 - 2012-1-23 + uncompyle + A Python 2.7 byte-code decompiler, written in Python 2.7 + 0.13 + 2012-2-22 Introduction ------------ 'uncompyle' converts Python byte-code back into equivalent Python -source. It accepts byte-code from Python version 2.7 only. +source. It accepts byte-code from Python version 2.7 only. Additionally, +it will only run on Python 2.7. The generated source is very readable: docstrings, lists, tuples and hashes get pretty-printed. @@ -17,13 +19,14 @@ by compiling it and comparing both byte-codes. 'uncompyle' is based on John Aycock's generic small languages compiler 'spark' (http://www.csr.uvic.ca/~aycock/python/) and his prior work on -'decompyle'. +a tool called 'decompyle'. This tool has been vastly improved by +Hartmut Goebel `http://www.crazy-compilers.com/`_ Additional note (3 July 2004, Ben Burton): - The original website from which this software was obtained is no longer - available. It has now become a commercial decompilation service, with - no software available for download. + This software is no longer available from the original website. It + has now become a commercial decompilation service, with no + software available for download. Any developers seeking to make alterations or enhancements to this code should therefore consider these debian packages an appropriate starting @@ -51,7 +54,7 @@ Features Requirements ------------ -'decompile' requires Python 2.2 or later. +uncompyle requires Python 2.7 Installation diff --git a/scripts/uncompyler.py b/scripts/uncompyler.py index b464f8e..158e848 100755 --- a/scripts/uncompyler.py +++ b/scripts/uncompyler.py @@ -1,31 +1,35 @@ #!/usr/bin/env python2.7 # Mode: -*- python -*- # -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2000-2002 by hartmut Goebel # """ Usage: uncompyler [OPTIONS]... [ FILE | DIR]... Examples: - uncompyler foo.pyc bar.pyc # uncompyle foo.pyc, bar.pyc to stdout - uncompyler -o . foo.pyc bar.pyc # uncompyle to ./foo.dis and ./bar.dis - uncompyler -o /tmp /usr/lib/python1.5 # uncompyle whole library + uncompyler foo.pyc bar.pyc # decompile foo.pyc, bar.pyc to stdout + uncompyler -o . foo.pyc bar.pyc # decompile to ./foo.dis and ./bar.dis + uncompyler -o /tmp /usr/lib/python1.5 # decompile whole library Options: -o output decompiled files to this path: if multiple input files are decompiled, the common prefix is stripped from these names and the remainder appended to - uncompyler -o /tmp bla/fasel.pyc bla/foo.pyc + uncompyle -o /tmp bla/fasel.pyc bla/foo.pyc -> /tmp/fasel.dis, /tmp/foo.dis - uncompyler -o /tmp bla/fasel.pyc bar/foo.pyc + uncompyle -o /tmp bla/fasel.pyc bar/foo.pyc -> /tmp/bla/fasel.dis, /tmp/bar/foo.dis - uncompyler -o /tmp /usr/lib/python1.5 + -s if multiple input files are decompiled, the common prefix + is stripped from these names and the remainder appended to + + uncompyle -o /tmp /usr/lib/python1.5 -> /tmp/smtplib.dis ... /tmp/lib-tk/FixTk.dis -c attempts a disassembly after compiling -d do not print timestamps - -p use number of processes - -r recurse directories looking for .pyc and .pyo files + -m use multiprocessing + --py use '.py' extension for generated files + --norecur don't recurse directories looking for .pyc and .pyo files --verify compare generated source with input byte-code (requires -o) --help show this message @@ -35,42 +39,45 @@ --showast -t include AST (abstract syntax tree) (disables --verify) Extensions of generated files: - '.dis' successfully decompiled (and verified if --verify) - '.dis_unverified' successfully decompile but --verify failed - '.nodis' uncompyle failed (contact author for enhancement) + '.pyc_dis' '.pyo_dis' successfully decompiled (and verified if --verify) + '.py' with --py option + + '_unverified' successfully decompile but --verify failed + + '_failed' uncompyle failed (contact author for enhancement) """ -from threading import Thread -from multiprocessing import Process, Queue -from Queue import Empty -from uncompyle import main, verify -def process_func(src_base, out_base, codes, outfile, showasm, showast, do_verify, fqueue, rqueue): - try: - (tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0) - while 1: - f = fqueue.get() - if f == None: - break - (t, o, f, v) = \ - main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify) - tot_files += t - okay_files += o - failed_files += f - verify_failed_files += v - except (Empty, KeyboardInterrupt, OSError): - pass - rqueue.put((tot_files, okay_files, failed_files, verify_failed_files)) - rqueue.close() +Usage_short = \ +"uncompyler [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." -if __name__ == '__main__': - Usage_short = \ - "decomyple [--help] [--verify] [--showasm] [--showast] [-o ] FILE|DIR..." +import sys, os, getopt +if sys.version[:3] != '2.7': + print >>sys.stderr, 'Error: uncompyler requires Python 2.7.' + sys.exit(-1) +from uncompyler import main, verify +import time +from multiprocessing import Process, Queue, cpu_count +from Queue import Empty - import sys, os, getopt - import os.path - import time +def process_func(fq, rq, src_base, out_base, codes, outfile, showasm, showast, do_verify, py, deob): + try: + (tot_files, okay_files, failed_files, verify_failed_files) = (0,0,0,0) + while 1: + f = fq.get() + if f == None: + break + (t, o, f, v) = \ + main(src_base, out_base, [f], codes, outfile, showasm, showast, do_verify, py, deob) + tot_files += t + okay_files += o + failed_files += f + verify_failed_files += v + except (Empty, KeyboardInterrupt): + pass + rq.put((tot_files, okay_files, failed_files, verify_failed_files)) + rq.close() - showasm = showast = do_verify = numproc = recurse_dirs = 0 +if __name__ == '__main__': ## for Windows multiprocessing + + showasm = showast = do_verify = multi = norecur = strip_common_path = py = deob = 0 outfile = '-' out_base = None codes = [] @@ -78,8 +85,8 @@ def process_func(src_base, out_base, codes, outfile, showasm, showast, do_verify timestampfmt = "# %Y.%m.%d %H:%M:%S %Z" try: - opts, files = getopt.getopt(sys.argv[1:], 'hatdro:c:p:', - ['help', 'verify', 'showast', 'showasm']) + opts, files = getopt.getopt(sys.argv[1:], 'hatdrmso:c:', + ['help', 'verify', 'showast', 'showasm', 'norecur', 'py', 'deob']) except getopt.GetoptError, e: print >>sys.stderr, '%s: %s' % (os.path.basename(sys.argv[0]), e) sys.exit(-1) @@ -102,17 +109,23 @@ def process_func(src_base, out_base, codes, outfile, showasm, showast, do_verify timestamp = False elif opt == '-c': codes.append(val) - elif opt == '-p': - numproc = int(val) - elif opt == '-r': - recurse_dirs = 1 + elif opt == '-m': + multi = 1 + elif opt == '--norecur': + norecur = 1 + elif opt == '-s': + strip_common_path = 1 + elif opt == '--py': + py = 1 + elif opt == '--deob': + deob = 1 else: print opt print Usage_short sys.exit(1) # expand directory if specified - if recurse_dirs: + if not norecur: expanded_files = [] for f in files: if os.path.isdir(f): @@ -120,18 +133,23 @@ def process_func(src_base, out_base, codes, outfile, showasm, showast, do_verify for df in dir_files: if df.endswith('.pyc') or df.endswith('.pyo'): expanded_files.append(os.path.join(root, df)) + else: + expanded_files.append(f) files = expanded_files # argl, commonprefix works on strings, not on path parts, # thus we must handle the case with files in 'some/classes' # and 'some/cmds' - src_base = os.path.commonprefix(files) - if src_base[-1:] != os.sep: - src_base = os.path.dirname(src_base) - if src_base: - sb_len = len( os.path.join(src_base, '') ) - files = map(lambda f: f[sb_len:], files) - del sb_len + if strip_common_path: + src_base = os.path.commonprefix(files) + if src_base[-1:] != os.sep: + src_base = os.path.dirname(src_base) + if src_base: + sb_len = len( os.path.join(src_base, '') ) + files = map(lambda f: f[sb_len:], files) + del sb_len + else: + src_base = '' if outfile == '-': outfile = None # use stdout @@ -142,21 +160,17 @@ def process_func(src_base, out_base, codes, outfile, showasm, showast, do_verify if timestamp: print time.strftime(timestampfmt) - if numproc <= 1: + if not multi: try: - result = main(src_base, out_base, files, codes, outfile, showasm, showast, do_verify) + result = main(src_base, out_base, files, codes, outfile, + showasm, showast, do_verify, py, deob) print '# decompiled %i files: %i okay, %i failed, %i verify failed' % result - except (KeyboardInterrupt, OSError): + except (KeyboardInterrupt): pass except verify.VerifyCmpError: raise else: - # create directories beforehand - for f in files: - try: - os.makedirs(os.path.join(out_base, os.path.dirname(f))) - except OSError: - pass + numproc = cpu_count() fqueue = Queue(len(files)+numproc) for f in files: fqueue.put(f) @@ -166,7 +180,10 @@ def process_func(src_base, out_base, codes, outfile, showasm, showast, do_verify rqueue = Queue(numproc) try: - procs = [Process(target=process_func, args=(src_base, out_base, codes, outfile, showasm, showast, do_verify, fqueue, rqueue)) for i in range(numproc)] + procs = [Process(target=process_func, + args=(fqueue, rqueue, src_base, out_base, codes, outfile, + showasm, showast, do_verify, py, deob)) + for i in range(numproc)] for p in procs: p.start() for p in procs: diff --git a/setup.cfg b/setup.cfg index ef17758..a9bf1b4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [bdist_rpm] release = 1 -packager = Hartmut Goebel +packager = Hartmut Goebel doc_files = README # CHANGES.txt # USAGE.txt diff --git a/setup.py b/setup.py index 00feac2..39690b3 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ version = "1.1", description = "Python byte-code to source-code converter", author = "Hartmut Goebel", - author_email = "hartmut@oberon.noris.de", + author_email = "h.goebel@crazy-compilers.com", url = "http://github.com/gstarnberger/uncompyle", packages=['uncompyle'], scripts=['scripts/uncompyler.py'], diff --git a/test/compile_tests b/test/compile_tests index 029ca6a..f37e06f 100644 --- a/test/compile_tests +++ b/test/compile_tests @@ -5,7 +5,7 @@ compile_tests -- compile test patterns for the decompyle test suite This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_applyEquiv.py b/test/test_applyEquiv.py index 02fff19..1c13ee2 100644 --- a/test/test_applyEquiv.py +++ b/test/test_applyEquiv.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information def kwfunc(**kwargs): diff --git a/test/test_augmentedAssign.py b/test/test_augmentedAssign.py index 7c5a3df..2e4b611 100644 --- a/test/test_augmentedAssign.py +++ b/test/test_augmentedAssign.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_class.py b/test/test_class.py index e610eb6..748790e 100644 --- a/test/test_class.py +++ b/test/test_class.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_del.py b/test/test_del.py index 26419f9..cd1fe17 100644 --- a/test/test_del.py +++ b/test/test_del.py @@ -5,7 +5,7 @@ Snippet taken from python libs's test_class.py decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_docstring.py b/test/test_docstring.py index a27b141..4a92005 100644 --- a/test/test_docstring.py +++ b/test/test_docstring.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information ''' diff --git a/test/test_exec.py b/test/test_exec.py index ed44815..9f1502a 100644 --- a/test/test_exec.py +++ b/test/test_exec.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information testcode = 'a = 12' diff --git a/test/test_expressions.py b/test/test_expressions.py index de11ae8..b00dd37 100644 --- a/test/test_expressions.py +++ b/test/test_expressions.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information def _lsbStrToInt(str): diff --git a/test/test_extendedImport.py b/test/test_extendedImport.py index 277d4c7..efd3a3f 100644 --- a/test/test_extendedImport.py +++ b/test/test_extendedImport.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information import os, sys as System, time diff --git a/test/test_extendedPrint.py b/test/test_extendedPrint.py index c0a64d3..b1f2801 100644 --- a/test/test_extendedPrint.py +++ b/test/test_extendedPrint.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information import sys diff --git a/test/test_extendedarg.py b/test/test_extendedarg.py index ee9562d..2f7d98e 100644 --- a/test/test_extendedarg.py +++ b/test/test_extendedarg.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information for i in range(1<<15+1): diff --git a/test/test_functions.py b/test/test_functions.py index 7b793e3..805a526 100644 --- a/test/test_functions.py +++ b/test/test_functions.py @@ -3,7 +3,7 @@ # This source is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information def x0(): diff --git a/test/test_global.py b/test/test_global.py index c789eb8..f4d24f9 100644 --- a/test/test_global.py +++ b/test/test_global.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_globals.py b/test/test_globals.py index 6e9cad0..0ae3c09 100644 --- a/test/test_globals.py +++ b/test/test_globals.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information def f(): diff --git a/test/test_import.py b/test/test_import.py index 73d1b1b..dc7317a 100644 --- a/test/test_import.py +++ b/test/test_import.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_import_as.py b/test/test_import_as.py index 5695c9c..f5a5b43 100644 --- a/test/test_import_as.py +++ b/test/test_import_as.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_integers.py b/test/test_integers.py index 45ac600..5df52ed 100644 --- a/test/test_integers.py +++ b/test/test_integers.py @@ -5,7 +5,7 @@ Snippet taken from python libs's test_class.py decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_lambda.py b/test/test_lambda.py index eb214bf..f85dc33 100644 --- a/test/test_lambda.py +++ b/test/test_lambda.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information palette = map(lambda a: (a,a,a), range(256)) diff --git a/test/test_listComprehensions.py b/test/test_listComprehensions.py index 9e0f11e..0e8aa2b 100644 --- a/test/test_listComprehensions.py +++ b/test/test_listComprehensions.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information XXX = range(4) diff --git a/test/test_loops.py b/test/test_loops.py index c5e11b5..e7f2429 100644 --- a/test/test_loops.py +++ b/test/test_loops.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_loops2.py b/test/test_loops2.py index 50c1188..09da4ee 100644 --- a/test/test_loops2.py +++ b/test/test_loops2.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_misc.py b/test/test_misc.py index f7a74c8..03ef6c9 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -4,7 +4,7 @@ # Snippet taken from python libs's test_class.py # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information raise "This program can't be run" diff --git a/test/test_nested_elif.py b/test/test_nested_elif.py index 8aac638..f8a52ed 100644 --- a/test/test_nested_elif.py +++ b/test/test_nested_elif.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information a = None diff --git a/test/test_nested_scopes.py b/test/test_nested_scopes.py index 32646e1..e3d7e04 100644 --- a/test/test_nested_scopes.py +++ b/test/test_nested_scopes.py @@ -3,7 +3,7 @@ # This source is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information from __future__ import nested_scopes diff --git a/test/test_prettyprint.py b/test/test_prettyprint.py index 957d72e..6c326d9 100644 --- a/test/test_prettyprint.py +++ b/test/test_prettyprint.py @@ -5,7 +5,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_print.py b/test/test_print.py index dd30661..f9f9e89 100644 --- a/test/test_print.py +++ b/test/test_print.py @@ -3,7 +3,7 @@ # This simple program is part of the decompyle test suite. # # decompyle is a Python byte-code decompiler -# See http://www.goebel-consult.de/decompyle/ for download and +# See http://www.crazy-compilers.com/decompyle/ for # for further information print 1,2,3,4,5 diff --git a/test/test_print_to.py b/test/test_print_to.py index a6901ad..45d62e7 100644 --- a/test/test_print_to.py +++ b/test/test_print_to.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ import sys diff --git a/test/test_slices.py b/test/test_slices.py index 1580e5e..f449c9c 100644 --- a/test/test_slices.py +++ b/test/test_slices.py @@ -5,7 +5,7 @@ Snippet taken from python libs's test_class.py decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_tuple_params.py b/test/test_tuple_params.py index 7cab1b1..469493f 100644 --- a/test/test_tuple_params.py +++ b/test/test_tuple_params.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/test/test_tuples.py b/test/test_tuples.py index 57362bd..15e973a 100644 --- a/test/test_tuples.py +++ b/test/test_tuples.py @@ -4,7 +4,7 @@ This source is part of the decompyle test suite. decompyle is a Python byte-code decompiler -See http://www.goebel-consult.de/decompyle/ for download and +See http://www.crazy-compilers.com/decompyle/ for for further information """ diff --git a/uncompyle/Parser.py b/uncompyle/Parser.py index 2f1b0e0..aa62a49 100644 --- a/uncompyle/Parser.py +++ b/uncompyle/Parser.py @@ -1,5 +1,5 @@ # Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 2005 by Dan Pascu # # See main module for license. @@ -188,20 +188,26 @@ def p_assign(self, args): assign ::= expr DUP_TOP designList assign ::= expr designator - stmt ::= _25_assign2 - stmt ::= _25_assign3 - _25_assign2 ::= expr expr ROT_TWO designator designator - _25_assign3 ::= expr expr expr ROT_THREE ROT_TWO designator designator designator + stmt ::= assign2 + stmt ::= assign3 + assign2 ::= expr expr ROT_TWO designator designator + assign3 ::= expr expr expr ROT_THREE ROT_TWO designator designator designator ''' def p_print(self, args): ''' - stmt ::= print_stmt - stmt ::= print_stmt_nl - stmt ::= print_nl_stmt - print_stmt ::= expr PRINT_ITEM - print_nl_stmt ::= PRINT_NEWLINE - print_stmt_nl ::= print_stmt print_nl_stmt + stmt ::= print_items_stmt + stmt ::= print_nl + stmt ::= print_items_nl_stmt + + print_items_stmt ::= expr PRINT_ITEM print_items_opt + print_items_nl_stmt ::= expr PRINT_ITEM print_items_opt PRINT_NEWLINE_CONT + print_items_opt ::= print_items + print_items_opt ::= + print_items ::= print_items print_item + print_items ::= print_item + print_item ::= expr PRINT_ITEM_CONT + print_nl ::= PRINT_NEWLINE ''' def p_print_to(self, args): @@ -216,47 +222,36 @@ def p_print_to(self, args): print_to_items ::= print_to_item print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO ''' - # expr print_to* POP_TOP - # expr { print_to* } PRINT_NEWLINE_TO - def p_import15(self, args): - ''' + def p_import20(self, args): + ''' stmt ::= importstmt stmt ::= importfrom + stmt ::= importstar + stmt ::= importmultiple - importstmt ::= IMPORT_NAME STORE_FAST - importstmt ::= IMPORT_NAME STORE_NAME - - importfrom ::= IMPORT_NAME importlist POP_TOP - importlist ::= importlist IMPORT_FROM - importlist ::= IMPORT_FROM - ''' - - def p_import20(self, args): - ''' - stmt ::= importstmt2 - stmt ::= importfrom2 - stmt ::= importstar2 - - stmt ::= _25_importstmt - stmt ::= _25_importfrom - stmt ::= _25_importstar - - importstmt2 ::= LOAD_CONST import_as - importstar2 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR - - importfrom2 ::= LOAD_CONST IMPORT_NAME importlist2 POP_TOP importlist2 ::= importlist2 import_as importlist2 ::= import_as import_as ::= IMPORT_NAME designator - import_as ::= IMPORT_NAME LOAD_ATTR designator - import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR designator - import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR LOAD_ATTR designator + import_as ::= IMPORT_NAME load_attrs designator import_as ::= IMPORT_FROM designator - _25_importstmt ::= LOAD_CONST LOAD_CONST import_as - _25_importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR - _25_importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP + importstmt ::= LOAD_CONST LOAD_CONST import_as + importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR + importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP + importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR + importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP + importmultiple ::= LOAD_CONST LOAD_CONST import_as imports_cont + + imports_cont ::= imports_cont import_cont + imports_cont ::= import_cont + import_cont ::= LOAD_CONST LOAD_CONST import_as_cont + import_as_cont ::= IMPORT_NAME_CONT designator + import_as_cont ::= IMPORT_NAME_CONT load_attrs designator + import_as_cont ::= IMPORT_FROM designator + + load_attrs ::= LOAD_ATTR + load_attrs ::= load_attrs LOAD_ATTR ''' def p_grammar(self, args): @@ -264,7 +259,6 @@ def p_grammar(self, args): stmts ::= stmts sstmt stmts ::= sstmt sstmt ::= stmt - sstmt ::= return_stmt sstmt ::= ifelsestmtr sstmt ::= return_stmt RETURN_LAST @@ -278,30 +272,43 @@ def p_grammar(self, args): c_stmts ::= _stmts c_stmts ::= _stmts lastc_stmt c_stmts ::= lastc_stmt - c_stmts ::= _stmts lastl_stmt continue_stmt - c_stmts ::= lastl_stmt continue_stmt - c_stmts ::= continue_stmt + c_stmts ::= continue_stmts lastc_stmt ::= iflaststmt lastc_stmt ::= whileelselaststmt lastc_stmt ::= forelselaststmt lastc_stmt ::= ifelsestmtr - lastc_stmt ::= c_trystmt + lastc_stmt ::= ifelsestmtc + lastc_stmt ::= tryelsestmtc c_stmts_opt ::= c_stmts c_stmts_opt ::= passstmt l_stmts ::= _stmts l_stmts ::= return_stmts + l_stmts ::= continue_stmts l_stmts ::= _stmts lastl_stmt l_stmts ::= lastl_stmt lastl_stmt ::= iflaststmtl lastl_stmt ::= ifelsestmtl - lastl_stmt ::= c_trystmt - + lastl_stmt ::= forelselaststmtl + lastl_stmt ::= tryelsestmtl + l_stmts_opt ::= l_stmts l_stmts_opt ::= passstmt + + suite_stmts ::= _stmts + suite_stmts ::= return_stmts + suite_stmts ::= continue_stmts + + suite_stmts_opt ::= suite_stmts + suite_stmts_opt ::= passstmt + + else_suite ::= suite_stmts + else_suitel ::= l_stmts + else_suitec ::= c_stmts + else_suitec ::= return_stmts designList ::= designator designator designList ::= designator DUP_TOP designList @@ -324,17 +331,35 @@ def p_grammar(self, args): stmt ::= call_stmt call_stmt ::= expr POP_TOP - return_stmt ::= expr RETURN_VALUE + stmt ::= return_stmt + return_stmt ::= ret_expr RETURN_VALUE + return_stmts ::= return_stmt + return_stmts ::= _stmts return_stmt + + return_if_stmts ::= return_if_stmt + return_if_stmts ::= _stmts return_if_stmt + return_if_stmt ::= ret_expr RETURN_END_IF + stmt ::= break_stmt break_stmt ::= BREAK_LOOP - continue_stmt ::= JUMP_BACK + stmt ::= continue_stmt + continue_stmt ::= CONTINUE continue_stmt ::= CONTINUE_LOOP + continue_stmts ::= _stmts lastl_stmt continue_stmt + continue_stmts ::= lastl_stmt continue_stmt + continue_stmts ::= continue_stmt - stmt ::= raise_stmt - raise_stmt ::= exprlist RAISE_VARARGS - raise_stmt ::= nullexprlist RAISE_VARARGS + stmt ::= raise_stmt0 + stmt ::= raise_stmt1 + stmt ::= raise_stmt2 + stmt ::= raise_stmt3 + + raise_stmt0 ::= RAISE_VARARGS_0 + raise_stmt1 ::= expr RAISE_VARARGS_1 + raise_stmt2 ::= expr expr RAISE_VARARGS_2 + raise_stmt3 ::= expr expr expr RAISE_VARARGS_3 stmt ::= exec_stmt exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT @@ -353,6 +378,7 @@ def p_grammar(self, args): stmt ::= forstmt stmt ::= forelsestmt stmt ::= trystmt + stmt ::= tryelsestmt stmt ::= tryfinallystmt stmt ::= withstmt stmt ::= withasstmt @@ -381,10 +407,13 @@ def p_grammar(self, args): classdefdeco2 ::= LOAD_CONST expr mkfunc CALL_FUNCTION_0 BUILD_CLASS assert ::= assert_expr POP_JUMP_IF_TRUE - LOAD_ASSERT RAISE_VARARGS + LOAD_ASSERT RAISE_VARARGS_1 assert2 ::= assert_expr POP_JUMP_IF_TRUE - LOAD_ASSERT expr RAISE_VARARGS + LOAD_ASSERT expr CALL_FUNCTION_1 RAISE_VARARGS_1 + + assert2 ::= assert_expr POP_JUMP_IF_TRUE + LOAD_ASSERT expr RAISE_VARARGS_2 assert_expr ::= expr assert_expr ::= assert_expr_or @@ -396,7 +425,6 @@ def p_grammar(self, args): _jump ::= JUMP_ABSOLUTE _jump ::= JUMP_FORWARD _jump ::= JUMP_BACK - _jump ::= JUMP_BACK JUMP_BACK_ELSE jmp_false ::= POP_JUMP_IF_FALSE jmp_true ::= POP_JUMP_IF_TRUE @@ -409,206 +437,119 @@ def p_grammar(self, args): testfalse ::= expr jmp_false testtrue ::= expr jmp_true - _ifstmts_jump ::= return_stmts + _ifstmts_jump ::= return_if_stmts _ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE - iflaststmtl ::= testexpr l_stmts_opt JUMP_ABSOLUTE - iflaststmtl ::= testexpr l_stmts_opt JUMP_BACK + iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK + + ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM + + ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec - ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD c_stmts COME_FROM - ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD return_stmts COME_FROM - ifelsestmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE c_stmts - ifelsestmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE return_stmts - ifelsestmtr ::= testexpr return_stmts return_stmts + ifelsestmtr ::= testexpr return_if_stmts return_stmts - ifelsestmtl ::= testexpr l_stmts_opt JUMP_ABSOLUTE l_stmts - ifelsestmtl ::= testexpr l_stmts_opt _jump_back_jump_back_else l_stmts - _jump_back_jump_back_else ::= JUMP_BACK JUMP_BACK_ELSE + ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel - trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK JUMP_FORWARD - COME_FROM except_stmts + trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + try_middle COME_FROM - trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK - COME_FROM JUMP_FORWARD except_stmts + tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + try_middle else_suite COME_FROM - except_stmts ::= except_cond1 except_sub_stmts - except_stmts ::= except_cond2 except_sub_stmts - except_stmts ::= except JUMP_FORWARD try_end COME_FROM - except_stmts ::= except2 END_FINALLY COME_FROM - except_stmts ::= END_FINALLY COME_FROM + tryelsestmtc ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + try_middle else_suitec COME_FROM - except_stmts_a ::= except_cond1 except_sub_stmts_a - except_stmts_a ::= except_cond2 except_sub_stmts_a - except_stmts_a ::= except JUMP_FORWARD try_end COME_FROM - except_stmts_a ::= except2 try_end - except_stmts_a ::= try_end + tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK + try_middle else_suitel COME_FROM - except_sub_stmts ::= c_stmts_opt JUMP_FORWARD except_stmts_a COME_FROM - except_sub_stmts ::= return_stmts except_stmts - except_sub_stmts ::= continue_stmts jmp_back except_stmts - - except_sub_stmts_a ::= c_stmts_opt JUMP_FORWARD except_stmts_a COME_FROM - except_sub_stmts_a ::= return_stmts except_stmts_a - except_sub_stmts_a ::= continue_stmts jmp_back except_stmts_a - - jmp_back ::= JUMP_BACK - jmp_back ::= JUMP_BACK JUMP_BACK_ELSE - continue_stmts ::= continue_stmt - continue_stmts ::=_stmts continue_stmt + try_middle ::= jmp_abs COME_FROM except_stmts + END_FINALLY + try_middle ::= JUMP_FORWARD COME_FROM except_stmts + END_FINALLY COME_FROM + + except_stmts ::= except_stmts except_stmt + except_stmts ::= except_stmt - try_end ::= END_FINALLY COME_FROM - try_end ::= except_else - except_else ::= END_FINALLY COME_FROM stmts + except_stmt ::= except_cond1 except_suite + except_stmt ::= except_cond2 except_suite + except_stmt ::= except + except_suite ::= c_stmts_opt JUMP_FORWARD + except_suite ::= c_stmts_opt jmp_abs + except_suite ::= return_stmts + except_cond1 ::= DUP_TOP expr COMPARE_OP - POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP - + POP_JUMP_IF_FALSE POP_TOP POP_TOP POP_TOP except_cond2 ::= DUP_TOP expr COMPARE_OP POP_JUMP_IF_FALSE POP_TOP designator POP_TOP - except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt - - except2 ::= POP_TOP POP_TOP POP_TOP return_stmts - - - c_trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK JUMP_FORWARD - COME_FROM c_except_stmts - - c_trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK - COME_FROM JUMP_FORWARD c_except_stmts - - c_trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK jmp_abs - COME_FROM c_except_stmts2 - - c_trystmt ::= SETUP_EXCEPT stmts_opt - POP_BLOCK - COME_FROM jmp_abs c_except_stmts2 - - c_except_stmts ::= except_cond1 c_except_sub_stmts - c_except_stmts ::= except_cond2 c_except_sub_stmts - c_except_stmts ::= except jmp_abs try_end3 - c_except_stmts ::= except2 END_FINALLY COME_FROM - c_except_stmts ::= END_FINALLY COME_FROM - - c_except_stmts_a ::= except_cond1 c_except_sub_stmts_a - c_except_stmts_a ::= except_cond2 c_except_sub_stmts_a - c_except_stmts_a ::= except jmp_abs try_end3 - c_except_stmts_a ::= except2 try_end3 - c_except_stmts_a ::= try_end3 - - try_end3 ::= END_FINALLY COME_FROM - try_end3 ::= except_else3 - except_else3 ::= END_FINALLY COME_FROM c_stmts - except_else3 ::= END_FINALLY COME_FROM l_stmts - - c_except_sub_stmts ::= c_stmts_opt jmp_abs c_except_stmts_a - c_except_sub_stmts ::= return_stmts c_except_stmts - - c_except_sub_stmts_a ::= c_stmts_opt jmp_abs c_except_stmts_a - c_except_sub_stmts_a ::= return_stmts c_except_stmts_a - - c_except_stmts2 ::= except_cond1 c_except_sub_stmts2 - c_except_stmts2 ::= except_cond2 c_except_sub_stmts2 - c_except_stmts2 ::= except jmp_abs try_end2 - c_except_stmts2 ::= except2 END_FINALLY - c_except_stmts2 ::= END_FINALLY - - c_except_stmts2_a ::= except_cond1 c_except_sub_stmts2_a - c_except_stmts2_a ::= except_cond2 c_except_sub_stmts2_a - c_except_stmts2_a ::= except jmp_abs try_end2 - c_except_stmts2_a ::= except2 try_end2 - c_except_stmts2_a ::= try_end2 - - c_except_sub_stmts2 ::= c_stmts_opt jmp_abs c_except_stmts2_a - c_except_sub_stmts2 ::= return_stmts c_except_stmts2 - - c_except_sub_stmts2_a ::= c_stmts_opt jmp_abs c_except_stmts2_a - c_except_sub_stmts2_a ::= return_stmts c_except_stmts2_a - - try_end2 ::= END_FINALLY - try_end2 ::= except_else2 - except_else2 ::= END_FINALLY c_stmts - except_else2 ::= END_FINALLY l_stmts - + except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt JUMP_FORWARD + except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt jmp_abs + except ::= POP_TOP POP_TOP POP_TOP return_stmts + jmp_abs ::= JUMP_ABSOLUTE jmp_abs ::= JUMP_BACK - jmp_abs ::= JUMP_BACK JUMP_BACK_ELSE - tryfinallystmt ::= SETUP_FINALLY stmts + tryfinallystmt ::= SETUP_FINALLY suite_stmts POP_BLOCK LOAD_CONST - COME_FROM stmts_opt END_FINALLY + COME_FROM suite_stmts_opt END_FINALLY - withstmt ::= expr SETUP_WITH POP_TOP stmts_opt + withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY - withasstmt ::= expr SETUP_WITH designator stmts_opt + withasstmt ::= expr SETUP_WITH designator suite_stmts_opt POP_BLOCK LOAD_CONST COME_FROM WITH_CLEANUP END_FINALLY whilestmt ::= SETUP_LOOP testexpr - l_stmts_opt _jump_back + l_stmts_opt JUMP_BACK POP_BLOCK COME_FROM - _jump_back ::= JUMP_BACK - _jump_back ::= JUMP_BACK JUMP_BACK_ELSE - whilestmt ::= SETUP_LOOP testexpr return_stmts POP_BLOCK COME_FROM - while1stmt ::= SETUP_LOOP l_stmts _jump_back COME_FROM + while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK COME_FROM while1stmt ::= SETUP_LOOP return_stmts COME_FROM + while1elsestmt ::= SETUP_LOOP l_stmts JUMP_BACK else_suite COME_FROM + whileelsestmt ::= SETUP_LOOP testexpr - l_stmts_opt _jump_back + l_stmts_opt JUMP_BACK POP_BLOCK - stmts COME_FROM + else_suite COME_FROM whileelselaststmt ::= SETUP_LOOP testexpr - l_stmts_opt _jump_back + l_stmts_opt JUMP_BACK POP_BLOCK - c_stmts COME_FROM + else_suitec COME_FROM _for ::= GET_ITER FOR_ITER _for ::= LOAD_CONST FOR_LOOP + for_block ::= l_stmts_opt JUMP_BACK + for_block ::= return_stmts _come_from + forstmt ::= SETUP_LOOP expr _for designator - l_stmts_opt _jump_back - POP_BLOCK COME_FROM - forstmt ::= SETUP_LOOP expr _for designator - return_stmts - POP_BLOCK COME_FROM + for_block POP_BLOCK COME_FROM forelsestmt ::= SETUP_LOOP expr _for designator - l_stmts_opt _jump_back - POP_BLOCK stmts COME_FROM - forelsestmt ::= SETUP_LOOP expr _for designator - return_stmts _come_from - POP_BLOCK stmts COME_FROM + for_block POP_BLOCK else_suite COME_FROM forelselaststmt ::= SETUP_LOOP expr _for designator - l_stmts_opt _jump_back - POP_BLOCK c_stmts COME_FROM - forelselaststmt ::= SETUP_LOOP expr _for designator - return_stmts _come_from - POP_BLOCK c_stmts COME_FROM + for_block POP_BLOCK else_suitec COME_FROM + + forelselaststmtl ::= SETUP_LOOP expr _for designator + for_block POP_BLOCK else_suitel COME_FROM - return_stmts ::= return_stmt - return_stmts ::= _stmts return_stmt - ''' def p_expr(self, args): @@ -618,7 +559,6 @@ def p_expr(self, args): expr ::= LOAD_FAST expr ::= LOAD_NAME expr ::= LOAD_CONST - expr ::= LOAD_ASSERT expr ::= LOAD_GLOBAL expr ::= LOAD_DEREF expr ::= LOAD_LOCALS @@ -703,16 +643,27 @@ def p_expr(self, args): conditional ::= expr POP_JUMP_IF_FALSE expr JUMP_FORWARD expr COME_FROM conditional ::= expr POP_JUMP_IF_FALSE expr JUMP_ABSOLUTE expr expr ::= conditionalnot - conditionalnot ::= expr POP_JUMP_IF_TRUE expr _jump expr COME_FROM + conditionalnot ::= expr POP_JUMP_IF_TRUE expr JUMP_FORWARD expr COME_FROM + conditionalnot ::= expr POP_JUMP_IF_TRUE expr JUMP_ABSOLUTE expr + + ret_expr ::= expr + ret_expr ::= ret_and + ret_expr ::= ret_or + + ret_expr_or_cond ::= ret_expr + ret_expr_or_cond ::= ret_cond + ret_expr_or_cond ::= ret_cond_not + + ret_and ::= expr JUMP_IF_FALSE_OR_POP ret_expr_or_cond COME_FROM + ret_or ::= expr JUMP_IF_TRUE_OR_POP ret_expr_or_cond COME_FROM + ret_cond ::= expr POP_JUMP_IF_FALSE expr RETURN_END_IF ret_expr_or_cond + ret_cond_not ::= expr POP_JUMP_IF_TRUE expr RETURN_END_IF ret_expr_or_cond stmt ::= return_lambda stmt ::= conditional_lambda - stmt ::= conditional_lambda2 - return_lambda ::= expr RETURN_VALUE LAMBDA_MARKER - conditional_lambda ::= expr POP_JUMP_IF_FALSE return_stmt return_stmt LAMBDA_MARKER - conditional_lambda2 ::= expr POP_JUMP_IF_FALSE expr POP_JUMP_IF_FALSE - return_stmt return_stmt LAMBDA_MARKER + return_lambda ::= ret_expr RETURN_VALUE LAMBDA_MARKER + conditional_lambda ::= expr POP_JUMP_IF_FALSE return_if_stmt return_stmt LAMBDA_MARKER cmp ::= cmp_list cmp ::= compare @@ -742,10 +693,13 @@ def p_expr(self, args): exprlist ::= expr nullexprlist ::= + + expr32 ::= expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr expr + expr1024 ::= expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 expr32 ''' def nonterminal(self, nt, args): - collect = ('stmts', 'exprlist', 'kvlist', '_stmts') + collect = ('stmts', 'exprlist', 'kvlist', '_stmts', 'print_items') if nt in collect and len(args) > 1: # @@ -801,12 +755,12 @@ def parse(tokens, customize): #nop = lambda self, args: None op = k[:string.rfind(k, '_')] if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'): - rule = 'build_list ::= ' + 'expr '*v + k + rule = 'build_list ::= ' + 'expr1024 '*(v/1024) + 'expr32 '*((v/32)%32) + 'expr '*(v%32) + k elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): rule = 'unpack ::= ' + k + ' designator'*v elif op == 'UNPACK_LIST': rule = 'unpack_list ::= ' + k + ' designator'*v - elif op == 'DUP_TOPX': + elif op in ('DUP_TOPX', 'RAISE_VARARGS'): # no need to add a rule continue #rule = 'dup_topx ::= ' + 'expr '*v + k diff --git a/uncompyle/Scanner.py b/uncompyle/Scanner.py index d228b31..c34d32e 100644 --- a/uncompyle/Scanner.py +++ b/uncompyle/Scanner.py @@ -1,5 +1,5 @@ # Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 2005 by Dan Pascu # # See main module for license. @@ -9,9 +9,18 @@ import types import dis +from collections import namedtuple +from array import array +from operator import itemgetter -globals().update(dis.opmap) +HAVE_ARGUMENT = dis.HAVE_ARGUMENT +globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()}) + +PJIF = POP_JUMP_IF_FALSE +PJIT = POP_JUMP_IF_TRUE +JA = JUMP_ABSOLUTE +JF = JUMP_FORWARD class Token: """ @@ -20,11 +29,12 @@ class Token: A byte-code token is equivalent to the contents of one line as output by dis.dis(). """ - def __init__(self, type, attr=None, pattr=None, offset=-1): - self.type = intern(type) + def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False): + self.type = intern(type_) self.attr = attr self.pattr = pattr self.offset = offset + self.linestart = linestart def __cmp__(self, o): if isinstance(o, Token): @@ -35,8 +45,12 @@ def __cmp__(self, o): def __repr__(self): return str(self.type) def __str__(self): - pattr = self.pattr or '' - return '%s\t%-17s %r' % (self.offset, self.type, pattr) + pattr = self.pattr + if self.linestart: + return '\n%s\t%-17s %r' % (self.offset, self.type, pattr) + else: + return '%s\t%-17s %r' % (self.offset, self.type, pattr) + def __hash__(self): return hash(self.type) def __getitem__(self, i): raise IndexError @@ -56,21 +70,15 @@ def __init__(self, co, scanner, classname=None): class Scanner: def __init__(self, version): - self.__version = version + self.version = version from sys import version_info - self.__pyversion = float('%d.%d' % version_info[0:2]) + self.pyversion = float('%d.%d' % version_info[0:2]) self.resetTokenClass() self.JUMP_OPs = map(lambda op: dis.opname[op], - dis.hasjrel + dis.hasjabs) - - copmap = {} - for i in range(len(dis.cmp_op)): - copmap[dis.cmp_op[i]] = i - dis.copmap = copmap - + dis.hasjrel + dis.hasjabs) def setShowAsm(self, showasm, out=None): self.showasm = showasm @@ -82,55 +90,107 @@ def setTokenClass(self, tokenClass): def resetTokenClass(self): self.setTokenClass(Token) + + def deobfuscate(self, co, linestarts, varnames): + n = 0 + code = self.code + for i in self.op_range(0, len(code)): + if code[i] in (RETURN_VALUE, END_FINALLY): + n = i + 1 + + fixed_code = array('B') + linestartoffsets = {a:b for (a, b) in linestarts[1:]} + newlinestarts = linestarts[0:1] + old_to_new = {} + new_to_old = {} + m = 0 + for i in self.op_range(0, n): + old_to_new[i] = m + new_to_old[m] = i + if i in linestartoffsets: + newlinestarts.append( (m, linestartoffsets[i]) ) + if code[i] != NOP: + fixed_code.append(code[i]) + m += 1 + if code[i] >= HAVE_ARGUMENT: + fixed_code.append(code[i+1]) + fixed_code.append(code[i+2]) + m += 2 + + self.code = code = fixed_code + for i in self.op_range(0, m): + if code[i] in dis.hasjrel: + #import pdb; pdb.set_trace() + old_jump = code[i+1] + code[i+2]*256 + old_target = new_to_old[i] + 3 + old_jump + new_target = old_to_new[old_target] + new_jump = new_target - i - 3 + code[i+1] = new_jump % 256 + code[i+2] = new_jump // 256 + if code[i] in dis.hasjabs: + old_target = code[i+1] + code[i+2]*256 + new_target = old_to_new[old_target] + code[i+1] = new_target % 256 + code[i+2] = new_target // 256 + + for i in range(len(varnames)): + varnames[i] = 'varnames_%s' % i + + for i in self.op_range(0, m): + if code[i] == IMPORT_NAME and code[i+3] == STORE_FAST: + varname_index = code[i+4] + code[i+5]*256 + name_index = code[i+1] + code[i+2]*256 + varnames[varname_index] = co.co_names[name_index] + - def disassemble(self, co, classname=None): + return newlinestarts + + + def disassemble(self, co, classname=None, deob=0): """ Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). """ + #import pdb; pdb.set_trace() rv = [] customize = {} Token = self.Token # shortcut - code = co.co_code + self.code = array('B', co.co_code) + + linestarts = list(dis.findlinestarts(co)) + varnames = list(co.co_varnames) + if deob: + linestarts = self.deobfuscate(co, linestarts, varnames) + + code = self.code n = len(code) + self.prev = [0] - i=0 - while i < n: - c = code[i] - op = ord(code[i]) - if op >= dis.HAVE_ARGUMENT: - self.prev.append(i) - self.prev.append(i) + for i in self.op_range(0, n): + op = code[i] + self.prev.append(i) + if op >= HAVE_ARGUMENT: self.prev.append(i) - i = i + 3 - else: self.prev.append(i) - i = i + 1 self.lines = [] - self.if_lines = {} + linetuple = namedtuple('linetuple', ['l_no', 'next']) j = 0 - linestarts = list(dis.findlinestarts(co)) + + linestartoffsets = {a for (a, _) in linestarts} (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: - self.lines.append((prev_line_no, start_byte)) + self.lines.append(linetuple(prev_line_no, start_byte)) j += 1 - last_op = ord(code[self.prev[start_byte]]) - if last_op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): - self.if_lines[prev_line_no] = True - else: - self.if_lines[prev_line_no] = False + last_op = code[self.prev[start_byte]] (prev_start_byte, prev_line_no) = (start_byte, line_no) while j < n: - self.lines.append((prev_line_no, n)) + self.lines.append(linetuple(prev_line_no, n)) j+=1 - self.if_lines[prev_line_no] = False - cf = self.find_jump_targets(code) - if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): @@ -140,34 +200,60 @@ def unmangle(name): free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [ unmangle(name) for name in co.co_names ] - varnames = [ unmangle(name) for name in co.co_varnames ] + varnames = [ unmangle(name) for name in varnames ] else: free = co.co_cellvars + co.co_freevars names = co.co_names - varnames = co.co_varnames - i = 0 + self.load_asserts = set() + for i in self.op_range(0, n): + if code[i] == PJIT and code[i+3] == LOAD_GLOBAL: + if names[code[i+4] + 256*code[i+5]] == 'AssertionError': + self.load_asserts.add(i+3) + + cf = self.find_jump_targets(code) + + last_stmt = self.next_stmt[0] + i = self.next_stmt[last_stmt] + replace = {} + while i < n-1: + if self.lines[last_stmt].next > i: + if code[last_stmt] == PRINT_ITEM: + if code[i] == PRINT_ITEM: + replace[i] = 'PRINT_ITEM_CONT' + elif code[i] == PRINT_NEWLINE: + replace[i] = 'PRINT_NEWLINE_CONT' + last_stmt = i + i = self.next_stmt[i] + + imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) + if len(imports) > 1: + last_import = imports[0] + for i in imports[1:]: + if self.lines[last_import].next > i: + if code[last_import] == IMPORT_NAME == code[i]: + replace[i] = 'IMPORT_NAME_CONT' + last_import = i + extended_arg = 0 - while i < n: - offset = i - k = 0 - if cf.has_key(offset): + for offset in self.op_range(0, n): + + if offset in cf: + k = 0 for j in cf[offset]: rv.append(Token('COME_FROM', None, repr(j), offset="%s_%d" % (offset, k) )) k += 1 - c = code[i] - op = ord(c) + op = code[offset] opname = dis.opname[op] - i += 1 oparg = None; pattr = None - if op >= dis.HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1]) * 256 + extended_arg + if op >= HAVE_ARGUMENT: + oparg = code[offset+1] + code[offset+2] * 256 + extended_arg extended_arg = 0 - i += 2 if op == dis.EXTENDED_ARG: extended_arg = oparg * 65536L + continue if op in dis.hasconst: const = co.co_consts[oparg] if type(const) == types.CodeType: @@ -192,7 +278,7 @@ def unmangle(name): elif op in dis.hasname: pattr = names[oparg] elif op in dis.hasjrel: - pattr = repr(i + oparg) + pattr = repr(offset + 3 + oparg) elif op in dis.hasjabs: pattr = repr(oparg) elif op in dis.haslocal: @@ -202,43 +288,42 @@ def unmangle(name): elif op in dis.hasfree: pattr = free[oparg] - if opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', - 'UNPACK_LIST', 'UNPACK_TUPLE', 'UNPACK_SEQUENCE', - 'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE', - 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW', - 'CALL_FUNCTION_VAR_KW', 'DUP_TOPX', + if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, + UNPACK_SEQUENCE, + MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, + CALL_FUNCTION_VAR, CALL_FUNCTION_KW, + CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. - if opname == 'BUILD_TUPLE' and \ - dis.opname[ord(code[offset-3])] == 'LOAD_CLOSURE': + if op == BUILD_TUPLE and \ + code[self.prev[offset]] == LOAD_CLOSURE: continue else: opname = '%s_%d' % (opname, oparg) - if opname not in ('BUILD_SLICE_2', 'BUILD_SLICE_3'): + if op != BUILD_SLICE: customize[opname] = oparg - elif opname == 'JUMP_ABSOLUTE': - target = self.__get_target(code, offset) + elif op == JA: + target = self.get_target(offset) if target < offset: - opname = 'JUMP_BACK' - - elif opname == 'LOAD_GLOBAL': - try: - if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE': - opname = 'LOAD_ASSERT' - except AttributeError: - pass - - elif opname == 'IMPORT_NAME': - if pattr == '': - pattr = '.' + if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \ + and offset not in self.not_continue: + opname = 'CONTINUE' + else: + opname = 'JUMP_BACK' - rv.append(Token(opname, oparg, pattr, offset)) + elif op == LOAD_GLOBAL: + if offset in self.load_asserts: + opname = 'LOAD_ASSERT' + elif op == RETURN_VALUE: + if offset in self.return_end_ifs: + opname = 'RETURN_END_IF' - if self.__jump_back_else.get(offset, False): - rv.append(Token('JUMP_BACK_ELSE', None, None, - offset="%s_" % offset )) + if offset not in replace: + rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets)) + else: + rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) if self.showasm: out = self.out # shortcut @@ -248,15 +333,15 @@ def unmangle(name): return rv, customize - def __get_target(self, code, pos, op=None): + def get_target(self, pos, op=None): if op is None: - op = ord(code[pos]) - target = ord(code[pos+1]) + ord(code[pos+2]) * 256 + op = self.code[pos] + target = self.code[pos+1] + self.code[pos+2] * 256 if op in dis.hasjrel: target += pos + 3 return target - def __first_instr(self, code, start, end, instr, target=None, exact=True): + def first_instr(self, start, end, instr, target=None, exact=True): """ Find the first in the block from start to end. is any python bytecode instruction or a list of opcodes @@ -267,23 +352,20 @@ def __first_instr(self, code, start, end, instr, target=None, exact=True): Return index to it or None if not found. """ - + code = self.code assert(start>=0 and end<=len(code)) - HAVE_ARGUMENT = dis.HAVE_ARGUMENT - - try: instr[0] + try: None in instr except: instr = [instr] pos = None distance = len(code) - i = start - while i < end: - op = ord(code[i]) + for i in self.op_range(start, end): + op = code[i] if op in instr: if target is None: return i - dest = self.__get_target(code, i, op) + dest = self.get_target(i, op) if dest == target: return i elif not exact: @@ -291,13 +373,9 @@ def __first_instr(self, code, start, end, instr, target=None, exact=True): if _distance < distance: distance = _distance pos = i - if op < HAVE_ARGUMENT: - i += 1 - else: - i += 3 return pos - def __last_instr(self, code, start, end, instr, target=None, exact=True): + def last_instr(self, start, end, instr, target=None, exact=True): """ Find the last in the block from start to end. is any python bytecode instruction or a list of opcodes @@ -309,24 +387,22 @@ def __last_instr(self, code, start, end, instr, target=None, exact=True): Return index to it or None if not found. """ + code = self.code if not (start>=0 and end<=len(code)): return None - HAVE_ARGUMENT = dis.HAVE_ARGUMENT - - try: instr[0] + try: None in instr except: instr = [instr] pos = None distance = len(code) - i = start - while i < end: - op = ord(code[i]) + for i in self.op_range(start, end): + op = code[i] if op in instr: if target is None: pos = i else: - dest = self.__get_target(code, i, op) + dest = self.get_target(i, op) if dest == target: distance = 0 pos = i @@ -335,13 +411,9 @@ def __last_instr(self, code, start, end, instr, target=None, exact=True): if _distance <= distance: distance = _distance pos = i - if op < HAVE_ARGUMENT: - i += 1 - else: - i += 3 return pos - def __all_instr(self, code, start, end, instr, target=None): + def all_instr(self, start, end, instr, target=None, include_beyond_target=False): """ Find all in the block from start to end. is any python bytecode instruction or a list of opcodes @@ -350,85 +422,206 @@ def __all_instr(self, code, start, end, instr, target=None): Return a list with indexes to them or [] if none found. """ - + + code = self.code assert(start>=0 and end<=len(code)) - HAVE_ARGUMENT = dis.HAVE_ARGUMENT - - try: instr[0] + try: None in instr except: instr = [instr] result = [] - i = start - while i < end: - op = ord(code[i]) + for i in self.op_range(start, end): + op = code[i] if op in instr: if target is None: result.append(i) - elif target == self.__get_target(code, i, op): + else: + t = self.get_target(i, op) + if include_beyond_target and t >= target: + result.append(i) + elif t == target: + result.append(i) + return result + + def op_size(self, op): + if op < HAVE_ARGUMENT: + return 1 + else: + return 3 + + def op_range(self, start, end): + while start < end: + yield start + start += self.op_size(self.code[start]) + + def build_stmt_indices(self): + code = self.code + start = 0; + end = len(code) + + stmt_opcodes = { + SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP, + SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH, + POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF, + STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME, + STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR, + RETURN_VALUE, RAISE_VARARGS, POP_TOP, + PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO, + STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, + DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3, + JUMP_ABSOLUTE, EXEC_STMT, + } + + stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)] + + designator_ops = { + STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR, + STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3, + STORE_SUBSCR, UNPACK_SEQUENCE, JA + } + + prelim = self.all_instr(start, end, stmt_opcodes) + + stmts = self.stmts = set(prelim) + + pass_stmts = set() + for seq in stmt_opcode_seqs: + for i in self.op_range(start, end-(len(seq)+1)): + match = True + for elem in seq: + if elem != code[i]: + match = False + break + i += self.op_size(code[i]) + + if match: + i = self.prev[i] + stmts.add(i) + pass_stmts.add(i) + + if pass_stmts: + stmt_list = list(stmts) + stmt_list.sort() + else: + stmt_list = prelim + last_stmt = -1 + self.next_stmt = [] + slist = self.next_stmt = [] + i = 0 + for s in stmt_list: + if code[s] == JA and s not in pass_stmts: + target = self.get_target(s) + if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no: + stmts.remove(s) + continue + j = self.prev[s] + while code[j] == JA: + j = self.prev[j] + if code[j] == LIST_APPEND: #list comprehension + stmts.remove(s) + continue + elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO: + stmts.remove(s) + continue + elif code[s] in designator_ops: + j = self.prev[s] + while code[j] in designator_ops: + j = self.prev[j] + if code[j] == FOR_ITER: + stmts.remove(s) + continue + last_stmt = s + slist += [s] * (s-i) + i = s + slist += [len(code)] * (len(code)-len(slist)) + + + def remove_mid_line_ifs(self, ifs): + filtered = [] + for i in ifs: + if self.lines[i].l_no == self.lines[i+3].l_no: + if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF): + continue + filtered.append(i) + return filtered + + + def rem_or(self, start, end, instr, target=None, include_beyond_target=False): + """ + Find all in the block from start to end. + is any python bytecode instruction or a list of opcodes + If is an opcode with a target (like a jump), a target + destination can be specified which must match precisely. + + Return a list with indexes to them or [] if none found. + """ + + code = self.code + assert(start>=0 and end<=len(code)) + + try: None in instr + except: instr = [instr] + + result = [] + for i in self.op_range(start, end): + op = code[i] + if op in instr: + if target is None: result.append(i) - if op < HAVE_ARGUMENT: - i += 1 - else: - i += 3 + else: + t = self.get_target(i, op) + if include_beyond_target and t >= target: + result.append(i) + elif t == target: + result.append(i) + + pjits = self.all_instr(start, end, PJIT) + filtered = [] + for pjit in pjits: + tgt = self.get_target(pjit)-3 + for i in result: + if i <= pjit or i >= tgt: + filtered.append(i) + result = filtered + filtered = [] return result - def __next_except_jump(self, code, start, end, target): + def next_except_jump(self, start): """ Return the next jump that was generated by an except SomeException: construct in a try...except...else clause or None if not found. """ - HAVE_ARGUMENT = dis.HAVE_ARGUMENT - lookup = [JUMP_ABSOLUTE, JUMP_FORWARD] - while start < end: - jmp = self.__first_instr(code, start, end, lookup, target) - if jmp is None: - return None - if jmp == end-3: - return jmp - after = jmp + 3 - ops = [None, None, None, None] - opp = [0, 0, 0, 0] - pos = 0 - x = jmp+3 - while x <= end and pos < 4: - op = ord(code[x]) - if op >= HAVE_ARGUMENT: - break - ops[pos] = op - opp[pos] = x - pos += 1 - x += 1 - if ops[0] == END_FINALLY and opp[0] == end: - return jmp - if ops[0] == DUP_TOP: - return jmp - if ops[0] == ops[1] == ops[2] == POP_TOP: + if self.code[start] == DUP_TOP: + except_match = self.first_instr(start, len(self.code), POP_JUMP_IF_FALSE) + if except_match: + jmp = self.prev[self.get_target(except_match)] + self.ignore_if.add(except_match) + self.not_continue.add(jmp) return jmp - start = jmp + 3 - return None - - def __fix_parent(self, code, target, parent): - """Fix parent boundaries if needed""" - start = parent['start'] - end = parent['end'] - - if target >= start or end-start < 3 or target not in self.__loops: - return - if ord(code[end-3])==JUMP_ABSOLUTE: - cont_target = self.__get_target(code, end-3, JUMP_ABSOLUTE) - if target == cont_target: - parent['end'] = end-3 - - def __restrict_to_parent(self, target, parent): + + count_END_FINALLY = 0 + count_SETUP_ = 0 + for i in self.op_range(start, len(self.code)): + op = self.code[i] + if op == END_FINALLY: + if count_END_FINALLY == count_SETUP_: + assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE) + self.not_continue.add(self.prev[i]) + return self.prev[i] + count_END_FINALLY += 1 + elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY): + count_SETUP_ += 1 + + + def restrict_to_parent(self, target, parent): """Restrict pos to parent boundaries.""" if not (parent['start'] < target < parent['end']): target = parent['end'] return target - def __detect_structure(self, code, pos, op=None): + def detect_structure(self, pos, op=None): """ Detect structures and their boundaries to fix optimizied jumps in python2.3+ @@ -436,15 +629,16 @@ def __detect_structure(self, code, pos, op=None): # TODO: check the struct boundaries more precisely -Dan + code = self.code # Ev remove this test and make op a mandatory argument -Dan if op is None: - op = ord(code[pos]) + op = code[pos] ## Detect parent structure - parent = self.__structs[0] + parent = self.structs[0] start = parent['start'] end = parent['end'] - for s in self.__structs: + for s in self.structs: _start = s['start'] _end = s['end'] if (_start <= pos < _end) and (_start >= start and _end <= end): @@ -453,297 +647,249 @@ def __detect_structure(self, code, pos, op=None): parent = s ## We need to know how many new structures were added in this run - origStructCount = len(self.__structs) + origStructCount = len(self.structs) if op == SETUP_LOOP: + #import pdb; pdb.set_trace() start = pos+3 - target = self.__get_target(code, pos, op) - end = self.__restrict_to_parent(target, parent) + target = self.get_target(pos, op) + end = self.restrict_to_parent(target, parent) if target != end: - self.__fixed_jumps[pos] = end + self.fixed_jumps[pos] = end (line_no, next_line_byte) = self.lines[pos] - jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE, + jump_back = self.last_instr(start, end, JA, next_line_byte, False) - if not jump_back: - return - - if self.__get_target(code, jump_back) >= next_line_byte: - jump_back = self.__last_instr(code, start, end, JUMP_ABSOLUTE, - start, False) - - - if end > jump_back+4 and ord(code[end]) in (JUMP_FORWARD, JUMP_ABSOLUTE): - if ord(code[jump_back+4]) in (JUMP_ABSOLUTE,): - if self.__get_target(code, jump_back+4) == self.__get_target(code, end): - self.__fixed_jumps[pos] = jump_back+4 - end = jump_back+4 - elif target < pos: - self.__fixed_jumps[pos] = jump_back+4 - end = jump_back+4 - - target = self.__get_target(code, jump_back, JUMP_ABSOLUTE) - - if ord(code[target]) in (FOR_ITER, GET_ITER): - loop_type = 'for' + + if jump_back and jump_back != self.prev[end] and code[jump_back+3] in (JA, JF): + if code[self.prev[end]] == RETURN_VALUE or \ + (code[self.prev[end]] == POP_BLOCK and code[self.prev[self.prev[end]]] == RETURN_VALUE): + jump_back = None + + if not jump_back: # loop suite ends in return. wtf right? + jump_back = self.last_instr(start, end, RETURN_VALUE) + 1 + if not jump_back: + return + if code[self.prev[next_line_byte]] not in (PJIF, PJIT): + loop_type = 'for' + else: + loop_type = 'while' + self.ignore_if.add(self.prev[next_line_byte]) + target = next_line_byte + end = jump_back + 3 else: - loop_type = 'while' - (line_no, next_line_byte) = self.lines[pos] - test = self.prev[next_line_byte] - assert(test is not None) - test_target = self.__get_target(code, test) - if test_target > (jump_back+3): - jump_back = test_target + if self.get_target(jump_back) >= next_line_byte: + jump_back = self.last_instr(start, end, JA, + start, False) + + if end > jump_back+4 and code[end] in (JF, JA): + if code[jump_back+4] in (JA, JF): + if self.get_target(jump_back+4) == self.get_target(end): + self.fixed_jumps[pos] = jump_back+4 + end = jump_back+4 + elif target < pos: + self.fixed_jumps[pos] = jump_back+4 + end = jump_back+4 - self.__loops.append(target) - self.__structs.append({'type': loop_type + '-loop', + target = self.get_target(jump_back, JA) + + if code[target] in (FOR_ITER, GET_ITER): + loop_type = 'for' + else: + loop_type = 'while' + test = self.prev[next_line_byte] + if test == pos: + loop_type = 'while 1' + else: + self.ignore_if.add(test) + test_target = self.get_target(test) + if test_target > (jump_back+3): + jump_back = test_target + + self.not_continue.add(jump_back) + + self.loops.append(target) + self.structs.append({'type': loop_type + '-loop', 'start': target, 'end': jump_back}) - self.__structs.append({'type': loop_type + '-else', - 'start': jump_back+3, - 'end': end}) + if jump_back+3 != end: + self.structs.append({'type': loop_type + '-else', + 'start': jump_back+3, + 'end': end}) elif op == SETUP_EXCEPT: start = pos+3 - target = self.__get_target(code, pos, op) - end = self.__restrict_to_parent(target, parent) + target = self.get_target(pos, op) + end = self.restrict_to_parent(target, parent) if target != end: - self.__fixed_jumps[pos] = end + self.fixed_jumps[pos] = end #print target, end, parent ## Add the try block - self.__structs.append({'type': 'try', + self.structs.append({'type': 'try', 'start': start, 'end': end-4}) ## Now isolate the except and else blocks - start = end - target = self.__get_target(code, self.prev[start]) - self.__fix_parent(code, target, parent) - end = self.__restrict_to_parent(target, parent) - #if target != end: - # self.__fixed_jumps[self.prev[start]] = end - - end_finally = self.__last_instr(code, start, end, END_FINALLY) - if end_finally is None: - return - lookup = [JUMP_FORWARD] - jump_end = self.__last_instr(code, start, end, lookup) - if jump_end: - target = self.__get_target(code, jump_end) - end = self.__restrict_to_parent(target, parent) - # if target != end: - # self.__fixed_jumps[jump_end] = end - ## Add the try-else block - self.__structs.append({'type': 'try-else', - 'start': end_finally+1, - 'end': end}) + end_else = start_else = self.get_target(self.prev[end]) + ## Add the except blocks - i = start - while i < end_finally: - jmp = self.__next_except_jump(code, i, end_finally, target) - if jmp is None: - break - self.__structs.append({'type': 'except', - 'start': i, - 'end': jmp}) - # if target != end: - # self.__fixed_jumps[jmp] = end - i = jmp+3 - - elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): - start = pos+3 - target = self.__get_target(code, pos, op) - rtarget = self.__restrict_to_parent(target, parent) + i = end + while self.code[i] != END_FINALLY: + jmp = self.next_except_jump(i) + if self.code[jmp] == RETURN_VALUE: + self.structs.append({'type': 'except', + 'start': i, + 'end': jmp+1}) + i = jmp + 1 + else: + if self.get_target(jmp) != start_else: + end_else = self.get_target(jmp) + if self.code[jmp] == JF: + self.fixed_jumps[jmp] = -1 + self.structs.append({'type': 'except', + 'start': i, + 'end': jmp}) + i = jmp + 3 + + ## Add the try-else block + if end_else != start_else: + r_end_else = self.restrict_to_parent(end_else, parent) + self.structs.append({'type': 'try-else', + 'start': i+1, + 'end': r_end_else}) + self.fixed_jumps[i] = r_end_else + else: + self.fixed_jumps[i] = i+1 - (line_no, next_line_byte) = self.lines[pos] + + elif op in (PJIF, PJIT): + start = pos+3 + target = self.get_target(pos, op) + rtarget = self.restrict_to_parent(target, parent) + pre = self.prev - if target == rtarget: - prev_target = self.prev[target] - prev_target_op = ord(code[prev_target]) - target_op = ord(code[target]) - if prev_target_op == JUMP_ABSOLUTE and target_op != POP_BLOCK: - if self.__get_target(code, prev_target) < pos: - self.__jump_back_else[prev_target] = True - - #is this part of a larger expression - if (ord(code[self.prev[target]]) in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, - POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) and (target > pos): - self.__fixed_jumps[pos] = self.prev[target] + if target != rtarget and parent['type'] == 'and/or': + self.fixed_jumps[pos] = rtarget return - - #is this not at the end of a line - if line_no == self.lines[start][0]: - #is this a one line if with multiple tests - good_op = False - prev = self.prev[next_line_byte] - p_op = ord(code[prev]) - if op == POP_JUMP_IF_FALSE: - if target == next_line_byte: - if p_op == JUMP_FORWARD: - if self.__get_target(code, prev) == target: - good_op = True - if p_op == RETURN_VALUE: - good_op = True - else: - if start < target < next_line_byte: - if ord(code[self.prev[target]]) in (JUMP_ABSOLUTE, JUMP_FORWARD, RETURN_VALUE): - good_op = True - while p_op in (JUMP_ABSOLUTE, JUMP_FORWARD, POP_BLOCK): - if p_op in (JUMP_ABSOLUTE, JUMP_FORWARD): - if self.__get_target(code, prev) == target: - good_op = True - break - prev = self.prev[prev] - p_op = ord(code[prev]) - if good_op: - last = self.__last_instr(code, start, next_line_byte, - (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target) - if last: - self.__fixed_jumps[pos] = last - return - else: - while p_op in (JUMP_ABSOLUTE, JUMP_FORWARD, POP_BLOCK): - if p_op in (JUMP_ABSOLUTE, JUMP_FORWARD): - if self.__get_target(code, prev) == target: - last = self.__last_instr(code, start, next_line_byte, - (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE)) - if last: - self.__fixed_jumps[pos] = last - return - break - prev = self.prev[prev] - p_op = ord(code[prev]) - - #if ifline - if self.if_lines.get(line_no, False): - if (target >= next_line_byte) or (target < pos): - if not (line_no == self.lines[target][0]): - self.__fixed_jumps[pos] = self.prev[next_line_byte] - return - if self.if_lines.get(line_no+1, False): - next_if = self.prev[self.lines[next_line_byte][1]] - if target == self.__get_target(code, next_if): - self.__fixed_jumps[pos] = next_if - elif (op == POP_JUMP_IF_TRUE) and (ord(code[next_if+3]) == JUMP_ABSOLUTE) and (target == self.__get_target(code, next_if+3)) and (target < pos): - self.__fixed_jumps[pos] = next_if - return - else: - if self.lines[target][0] > line_no: - next = self.__first_instr(code, start, target, POP_JUMP_IF_FALSE, target) - j = self.__first_instr(code, start, target, JUMP_ABSOLUTE, target) - if next and not j: - self.__fixed_jumps[pos] = next - return + #does this jump to right after another cond jump? + # if so, it's part of a larger conditional + if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP, + PJIF, PJIT)) and (target > pos): + self.fixed_jumps[pos] = pre[target] + self.structs.append({'type': 'and/or', + 'start': start, + 'end': pre[target]}) return - - if op == POP_JUMP_IF_FALSE: - i = self.lines[next_line_byte][0] - k = j = next_line_byte - num_pj = 1 - while ((self.if_lines.get(i, False) - and ((self.__get_target(code, self.lines[j][1]-3) == target) - or ((ord(code[self.lines[j][1]-3]) == POP_JUMP_IF_TRUE) - and (ord(code[self.__get_target(code, self.lines[j][1]-3)-3]) == POP_JUMP_IF_FALSE) - and (self.__get_target(code, self.__get_target(code, self.lines[j][1]-3)-3) == target)))) - or (ord(code[self.prev[self.lines[j][1]]]) in (LOAD_ATTR, LOAD_FAST, JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP))): - if (self.if_lines.get(i, False) and (self.__get_target(code, self.lines[j][1]-3) == target)): - num_pj += 1 - j = self.lines[j][1] - i = self.lines[j][0] - if (ord(code[self.prev[j]]) not in (LOAD_ATTR, LOAD_FAST, JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP)): - k = j - if k > next_line_byte: - if num_pj > 1 and target > pos: - prev_end = self.prev[rtarget] - num_pj += len({ self.lines[a][0] for a in self.__all_instr(code, k, prev_end, (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)}) - num_pr = len({ self.lines[a][0] for a in self.__all_instr(code, k, prev_end, (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), rtarget)}) - num_jumps = 0 - while ord(code[prev_end]) in (JUMP_FORWARD, JUMP_ABSOLUTE) and self.__get_target(code, prev_end) == target: - num_pr += len({ self.lines[a][0] for a in self.__all_instr(code, k, prev_end, (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), prev_end)}) - num_jumps += 1 - prev_end = self.prev[prev_end] - if ord(code[prev_end]) == RETURN_VALUE: - num_jumps += 1 - num_pj += num_pr - num_pj += len(self.__all_instr(code, k, prev_end, (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE), target)) - if num_pj > num_jumps: - self.__fixed_jumps[pos] = k-3 + + # is this an if and + if op == PJIF: + match = self.rem_or(start, self.next_stmt[pos], PJIF, target) + match = self.remove_mid_line_ifs(match) + if match: + if code[pre[rtarget]] in (JF, JA) \ + and pre[rtarget] not in self.stmts \ + and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget: + if code[pre[pre[rtarget]]] == JA \ + and self.remove_mid_line_ifs([pos]) \ + and target == self.get_target(pre[pre[rtarget]]) \ + and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\ + and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))): + pass + elif code[pre[pre[rtarget]]] == RETURN_VALUE \ + and self.remove_mid_line_ifs([pos]) \ + and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \ + (PJIF, PJIT), target))) \ + | set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \ + (PJIF, PJIT, JA), pre[rtarget], True))))): + pass + else: + fix = None + jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF) + last_jump_good = True + for j in jump_ifs: + if target == self.get_target(j): + if self.lines[j].next == j+3 and last_jump_good: + fix = j + break + else: + last_jump_good = False + self.fixed_jumps[pos] = fix or match[-1] return else: - self.__fixed_jumps[pos] = k-3 + self.fixed_jumps[pos] = match[-1] return - -# elif op == POP_JUMP_IF_TRUE and target > pos: -# i = self.lines[next_line_byte][0] -# j = next_line_byte -# while (self.if_lines.get(i, False) -# and ((self.__get_target(code, self.lines[j][1]-3) == target) -# and (ord(code[self.lines[j][1]-3]) == POP_JUMP_IF_TRUE))): -# j = self.lines[j][1] -# i = self.lines[j][0] -# if j > next_line_byte: -# self.__fixed_jumps[pos] = j-3 -# return - elif op == POP_JUMP_IF_TRUE: - def equaljumps(jump1, jump2): - jump_ops = (JUMP_ABSOLUTE, JUMP_FORWARD) - while ord(code[jump1]) in jump_ops: - jump1 = self.__get_target(code, jump1) - while ord(code[jump2]) in jump_ops: - jump2 = self.__get_target(code, jump2) - return jump1 == jump2 - i = self.lines[next_line_byte][0] - j = next_line_byte - while self.if_lines.get(i, False): - j = self.lines[j][1] - i = self.lines[j][0] - if j > next_line_byte: - if ord(code[j]) == JUMP_ABSOLUTE and equaljumps(j, target): - self.__fixed_jumps[pos] = j-3 + else: # op == PJIT + if (pos+3) in self.load_asserts: + if code[pre[rtarget]] == RAISE_VARARGS: return - - if (target < pos) and ((ord(code[target]) == FOR_ITER) or (ord(code[self.prev[target]]) == SETUP_LOOP)): -# self.__end_if_line[start] = 0 - - if ord(code[self.prev[end]]) == JUMP_ABSOLUTE: - if self.__get_target(code, self.prev[end]) == target: - self.__structs.append({'type': 'if-then', - 'start': pos, - 'end': self.prev[end]}) -# print self.__structs[-1] + self.load_asserts.remove(pos+3) + + next = self.next_stmt[pos] + if pre[next] == pos: + pass + elif code[next] in (JF, JA) and target == self.get_target(next): + if code[pre[next]] == PJIF: + if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE): + self.fixed_jumps[pos] = pre[next] + return + elif code[next] == JA and code[target] in (JA, JF): + next_target = self.get_target(next) + if self.get_target(target) == next_target: + self.fixed_jumps[pos] = pre[next] + return + elif code[next_target] in (JA, JF) and self.get_target(next_target) == self.get_target(target): + self.fixed_jumps[pos] = pre[next] + return + + + #don't add a struct for a while test, it's already taken care of + if pos in self.ignore_if: return - + + if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \ + and pre[rtarget] != pos and pre[pre[rtarget]] != pos: + if code[rtarget] == JA and code[rtarget+3] == POP_BLOCK: + if code[pre[pre[rtarget]]] != JA: + pass + elif self.get_target(pre[pre[rtarget]]) != target: + pass + else: + rtarget = pre[rtarget] + else: + rtarget = pre[rtarget] + #does the if jump just beyond a jump op, then this is probably an if statement - if ord(code[self.prev[rtarget]]) in (JUMP_ABSOLUTE, JUMP_FORWARD): - if_end = self.__get_target(code, self.prev[rtarget]) - - if (if_end < self.prev[rtarget]) and (ord(code[self.prev[if_end]]) == SETUP_LOOP): - loopjump = self.__last_instr(code, start, end, JUMP_ABSOLUTE, if_end) + if code[pre[rtarget]] in (JA, JF): + if_end = self.get_target(pre[rtarget]) + + #is this a loop not an if? + if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP): if(if_end > start): return - end = self.__restrict_to_parent(if_end, parent) + end = self.restrict_to_parent(if_end, parent) -# self.__end_if_line[start] = rtarget - - self.__structs.append({'type': 'if-then', + self.structs.append({'type': 'if-then', 'start': start, - 'end': self.prev[rtarget]}) - + 'end': pre[rtarget]}) + self.not_continue.add(pre[rtarget]) + if rtarget < end: - self.__structs.append({'type': 'if-else', + self.structs.append({'type': 'if-else', 'start': rtarget, 'end': end}) - elif ord(code[self.prev[rtarget]]) == RETURN_VALUE: -# self.__end_if_line[start] = rtarget - # self.__fixed_jumps[pos] = rtarget - self.__structs.append({'type': 'if-then', + elif code[pre[rtarget]] == RETURN_VALUE: + self.structs.append({'type': 'if-then', 'start': start, 'end': rtarget}) + self.return_end_ifs.add(pre[rtarget]) + elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): - target = self.__get_target(code, pos, op) - if target > pos: - unop_target = self.__last_instr(code, pos, target, JUMP_FORWARD, target) - if unop_target and ord(code[unop_target+3]) != ROT_TWO: - self.__fixed_jumps[pos] = unop_target + target = self.get_target(pos, op) +# if target > pos: +# unop_target = self.last_instr(pos, target, JF, target) +# if unop_target and code[unop_target+3] != ROT_TWO: +# self.fixed_jumps[pos] = unop_target +# else: + self.fixed_jumps[pos] = self.restrict_to_parent(target, parent) @@ -757,48 +903,46 @@ def find_jump_targets(self, code): This procedure is modelled after dis.findlables(), but here for each target the number of jumps are counted. """ - HAVE_ARGUMENT = dis.HAVE_ARGUMENT hasjrel = dis.hasjrel hasjabs = dis.hasjabs - needFixing = (self.__pyversion >= 2.3) - n = len(code) - self.__structs = [{'type': 'root', + self.structs = [{'type': 'root', 'start': 0, 'end': n-1}] - self.__loops = [] ## All loop entry points - self.__fixed_jumps = {} ## Map fixed jumps to their real destination - self.__jump_back_else = {} + self.loops = [] ## All loop entry points + self.fixed_jumps = {} ## Map fixed jumps to their real destination + self.ignore_if = set() + self.build_stmt_indices() + self.not_continue = set() + self.return_end_ifs = set() targets = {} - i = 0 - while i < n: - op = ord(code[i]) + for i in self.op_range(0, n): + op = code[i] - if needFixing: - ## Determine structures and fix jumps for 2.3+ - self.__detect_structure(code, i, op) + ## Determine structures and fix jumps for 2.3+ + self.detect_structure(i, op) if op >= HAVE_ARGUMENT: - label = self.__fixed_jumps.get(i) - oparg = ord(code[i+1]) + ord(code[i+2]) * 256 + label = self.fixed_jumps.get(i) + oparg = code[i+1] + code[i+2] * 256 if label is None: if op in hasjrel and op != FOR_ITER: label = i + 3 + oparg elif op in hasjabs: - if op in [JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP]: + if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP): if (oparg > i): label = oparg - if label is not None: + if label is not None and label != -1: targets[label] = targets.get(label, []) + [i] - i += 3 - else: - i += 1 + elif op == END_FINALLY and i in self.fixed_jumps: + label = self.fixed_jumps[i] + targets[label] = targets.get(label, []) + [i] return targets diff --git a/uncompyle/Walker.py b/uncompyle/Walker.py index a293fa5..9ae9205 100644 --- a/uncompyle/Walker.py +++ b/uncompyle/Walker.py @@ -1,5 +1,5 @@ # Copyright (c) 1999 John Aycock -# Copyright (c) 2000-2002 by hartmut Goebel +# Copyright (c) 2000-2002 by hartmut Goebel # Copyright (c) 2005 by Dan Pascu # # See main module for license. @@ -53,7 +53,7 @@ # the end of functions). RETURN_LOCALS = AST('return_stmt', - [ AST('expr', [ Token('LOAD_LOCALS') ]), + [ AST('ret_expr', [AST('expr', [ Token('LOAD_LOCALS') ])]), Token('RETURN_VALUE')]) @@ -94,9 +94,9 @@ 'STORE_ATTR': ( '%c.%[1]{pattr}', 0), # 'STORE_SUBSCR': ( '%c[%c]', 0, 1 ), 'STORE_SLICE+0': ( '%c[:]', 0 ), - 'STORE_SLICE+1': ( '%c[%c:]', 0, 1 ), - 'STORE_SLICE+2': ( '%c[:%c]', 0, 1 ), - 'STORE_SLICE+3': ( '%c[%c:%c]', 0, 1, 2 ), + 'STORE_SLICE+1': ( '%c[%p:]', 0, (1,100) ), + 'STORE_SLICE+2': ( '%c[:%p]', 0, (1,100) ), + 'STORE_SLICE+3': ( '%c[%p:%p]', 0, (1,100), (2,100) ), 'DELETE_SLICE+0': ( '%|del %c[:]\n', 0 ), 'DELETE_SLICE+1': ( '%|del %c[%c:]\n', 0, 1 ), 'DELETE_SLICE+2': ( '%|del %c[:%c]\n', 0, 1 ), @@ -147,9 +147,9 @@ 'unary_convert': ( '`%c`', 0 ), 'get_iter': ( 'iter(%c)', 0 ), 'slice0': ( '%c[:]', 0 ), - 'slice1': ( '%c[%c:]', 0, 1 ), - 'slice2': ( '%c[:%c]', 0, 1 ), - 'slice3': ( '%c[%c:%c]', 0, 1, 2 ), + 'slice1': ( '%c[%p:]', 0, (1,100) ), + 'slice2': ( '%c[:%p]', 0, (1,100) ), + 'slice3': ( '%c[%p:%p]', 0, (1,100), (2,100) ), 'IMPORT_FROM': ( '%{pattr}', ), 'load_attr': ( '%c.%[1]{pattr}', 0), @@ -164,14 +164,15 @@ 'DELETE_NAME': ( '%|del %{pattr}\n', ), 'DELETE_GLOBAL': ( '%|del %{pattr}\n', ), 'delete_subscr': ( '%|del %c[%c]\n', 0, 1,), - 'binary_subscr': ( '%c[%c]', 0, 1), - 'binary_subscr2': ( '%c[%c]', 0, 1), + 'binary_subscr': ( '%c[%p]', 0, (1,100)), + 'binary_subscr2': ( '%c[%p]', 0, (1,100)), 'store_subscr': ( '%c[%c]', 0, 1), 'STORE_FAST': ( '%{pattr}', ), 'STORE_NAME': ( '%{pattr}', ), 'STORE_GLOBAL': ( '%{pattr}', ), 'STORE_DEREF': ( '%{pattr}', ), - 'unpack': ( '(%C,)', (1, sys.maxint, ', ') ), + 'unpack': ( '%C%,', (1, sys.maxint, ', ') ), + 'unpack_w_parens': ( '(%C%,)', (1, sys.maxint, ', ') ), 'unpack_list': ( '[%C]', (1, sys.maxint, ', ') ), 'build_tuple2': ( '%P', (0,-1,', ', 100) ), @@ -197,41 +198,38 @@ # 'dup_topx': ( '%c', 0), 'designList': ( '%c = %c', 0, -1 ), 'and': ( '%c and %c', 0, 2 ), + 'ret_and': ( '%c and %c', 0, 2 ), 'and2': ( '%c', 3 ), 'or': ( '%c or %c', 0, 2 ), - 'conditional': ( '%p if %p else %p', (2,100), (0,100), (4,100)), - 'conditionaland': ( '%p if %p and %p else %p', (4,100), (0,24), (2,24), (6,100)), - 'conditionalnot': ( '%p if not %p else %p', (2,100), (0,22), (4,100)), + 'ret_or': ( '%c or %c', 0, 2 ), + 'conditional': ( '%p if %p else %p', (2,27), (0,27), (4,27)), + 'ret_cond': ( '%p if %p else %p', (2,27), (0,27), (4,27)), + 'conditionalnot': ( '%p if not %p else %p', (2,27), (0,22), (4,27)), + 'ret_cond_not': ( '%p if not %p else %p', (2,27), (0,22), (4,27)), 'conditional_lambda': ( '(%c if %c else %c)', 2, 0, 3), - 'conditional_lambda2': ( '(%c if %p and %p else %c)', 4, (0,24), (2,24), 5), 'return_lambda': ('%c', 0), - 'compare': ( '%c %[-1]{pattr} %c', 0, 1 ), - 'cmp_list': ( '%c %c', 0, 1), - 'cmp_list1': ( '%[3]{pattr} %c %c', 0, -2), - 'cmp_list2': ( '%[1]{pattr} %c', 0), + 'compare': ( '%p %[-1]{pattr} %p', (0,19), (1,19) ), + 'cmp_list': ( '%p %p', (0,20), (1,19)), + 'cmp_list1': ( '%[3]{pattr} %p %p', (0,19), (-2,19)), + 'cmp_list2': ( '%[1]{pattr} %p', (0,19)), # 'classdef': (), # handled by n_classdef() - 'funcdef': ( '\n%|def %c\n', -2), # -2 to handle closures - 'funcdefdeco': ( '%c', 0), - 'mkfuncdeco': ( '\n%|@%c%c', 0, 1), - 'mkfuncdeco0': ( '\n%|def %c\n', 0), + 'funcdef': ( '\n\n%|def %c\n', -2), # -2 to handle closures + 'funcdefdeco': ( '\n\n%c', 0), + 'mkfuncdeco': ( '%|@%c\n%c', 0, 1), + 'mkfuncdeco0': ( '%|def %c\n', 0), 'classdefdeco': ( '%c', 0), - 'classdefdeco1': ( '\n%|@%c%c', 0, 1), + 'classdefdeco1': ( '\n\n%|@%c%c', 0, 1), 'kwarg': ( '%[0]{pattr}=%c', 1), - 'importstmt': ( '%|import %[0]{pattr}\n', ), - 'importfrom': ( '%|from %[0]{pattr} import %c\n', 1 ), - 'importlist': ( '%C', (0, sys.maxint, ', ') ), - 'importstmt2': ( '%|import %c\n', 1), - 'importstar2': ( '%|from %[1]{pattr} import *\n', ), - 'importfrom2': ( '%|from %[1]{pattr} import %c\n', 2 ), 'importlist2': ( '%C', (0, sys.maxint, ', ') ), 'assert': ( '%|assert %c\n' , 0 ), 'assert2': ( '%|assert %c, %c\n' , 0, 3 ), 'assert_expr_or': ( '%c or %c', 0, 2 ), 'assert_expr_and': ( '%c and %c', 0, 2 ), - 'print_stmt': ( '%|print %c,\n', 0 ), - 'print_stmt_nl': ( '%|print %[0]C\n', (0,1, None) ), - 'print_nl_stmt': ( '%|print\n', ), + 'print_items_stmt': ( '%|print %c%c,\n', 0, 2), + 'print_items_nl_stmt': ( '%|print %c%c\n', 0, 2), + 'print_item': ( ', %c', 0), + 'print_nl': ( '%|print\n', ), 'print_to': ( '%|print >> %c, %c,\n', 0, 1 ), 'print_to_nl': ( '%|print >> %c, %c\n', 0, 1 ), 'print_nl_to': ( '%|print >> %c\n', 0 ), @@ -240,8 +238,11 @@ 'call_stmt': ( '%|%p\n', (0,200)), 'break_stmt': ( '%|break\n', ), 'continue_stmt': ( '%|continue\n', ), - 'jcontinue_stmt': ( '%|continue\n', ), - 'raise_stmt': ( '%|raise %[0]C\n', (0,sys.maxint,', ') ), + + 'raise_stmt0': ( '%|raise\n', ), + 'raise_stmt1': ( '%|raise %c\n', 0), + 'raise_stmt2': ( '%|raise %c, %c\n', 0, 1), + 'raise_stmt3': ( '%|raise %c, %c, %c\n', 0, 1, 2), # 'yield': ( 'yield %c', 0), # 'return_stmt': ( '%|return %c\n', 0), @@ -251,40 +252,38 @@ 'testtrue': ( 'not %p', (0,22) ), 'ifelsestmt': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), + 'ifelsestmtc': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelsestmtl': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelifstmt': ( '%|if %c:\n%+%c%-%c', 0, 1, 3 ), 'elifelifstmt': ( '%|elif %c:\n%+%c%-%c', 0, 1, 3 ), 'elifstmt': ( '%|elif %c:\n%+%c%-', 0, 1 ), 'elifelsestmt': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 3 ), 'ifelsestmtr': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 1, 2 ), - - 'whilestmt': ( '%|while %c:\n%+%c%-\n', 1, 2 ), - 'while1stmt': ( '%|while 1:\n%+%c%-\n', 1 ), - 'whileelsestmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-\n', 1, 2, -2 ), - 'whileelselaststmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-\n', 1, 2, -2 ), - 'forstmt': ( '%|for %c in %c:\n%+%c%-\n', 3, 1, 4 ), + 'elifelsestmtr': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-\n\n', 0, 1, 2 ), + + 'whilestmt': ( '%|while %c:\n%+%c%-\n\n', 1, 2 ), + 'while1stmt': ( '%|while 1:\n%+%c%-\n\n', 1 ), + 'while1elsestmt': ( '%|while 1:\n%+%c%-%|else:\n%+%c%-\n\n', 1, 3 ), + 'whileelsestmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-\n\n', 1, 2, -2 ), + 'whileelselaststmt': ( '%|while %c:\n%+%c%-%|else:\n%+%c%-', 1, 2, -2 ), + 'forstmt': ( '%|for %c in %c:\n%+%c%-\n\n', 3, 1, 4 ), 'forelsestmt': ( - '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n', 3, 1, 4, -2), + '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, 1, 4, -2), 'forelselaststmt': ( '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-', 3, 1, 4, -2), - 'trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ), - 'c_trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ), - 'tf_trystmt': ( '%c%-%c%+', 1, 5 ), + 'forelselaststmtl': ( + '%|for %c in %c:\n%+%c%-%|else:\n%+%c%-\n\n', 3, 1, 4, -2), + 'trystmt': ( '%|try:\n%+%c%-%c\n\n', 1, 3 ), + 'tryelsestmt': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-\n\n', 1, 3, 4 ), + 'tryelsestmtc': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), + 'tryelsestmtl': ( '%|try:\n%+%c%-%c%|else:\n%+%c%-', 1, 3, 4 ), + 'tf_trystmt': ( '%c%-%c%+', 1, 3 ), + 'tf_tryelsestmt': ( '%c%-%c%|else:\n%+%c', 1, 3, 4 ), 'except': ( '%|except:\n%+%c%-', 3 ), - 'except2': ( '%|except:\n%+%c%-', 3 ), 'except_cond1': ( '%|except %c:\n', 1 ), 'except_cond2': ( '%|except %c as %c:\n', 1, 5 ), - 'except_sub_stmts': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'except_sub_stmts_a': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'c_except_sub_stmts': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'c_except_sub_stmts2': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'c_except_sub_stmts_a': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'c_except_sub_stmts2_a': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), - 'except_cond_cont': ( '%c%+%|continue\n%-', 0), - 'except_else': ( '%|else:\n%+%c%-', 2 ), - 'except_else2': ( '%|else:\n%+%c%-', 1 ), - 'except_else3': ( '%|else:\n%+%c%-', 2 ), - 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-', 1, 5 ), + 'except_suite': ( '%+%c%-%C', 0, (1, sys.maxint, '') ), + 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-\n\n', 1, 5 ), 'withstmt': ( '%|with %c:\n%+%c%-', 0, 3), 'withasstmt': ( '%|with %c as %c:\n%+%c%-', 0, 2, 3), 'passstmt': ( '%|pass\n', ), @@ -298,13 +297,15 @@ ## # Import style for 2.5 - '_25_importstmt': ( '%|import %c\n', 2), - '_25_importstar': ( '%|from %[2]{pattr} import *\n', ), - '_25_importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), + 'importstmt': ( '%|import %c\n', 2), + 'importstar': ( '%|from %[2]{pattr} import *\n', ), + 'importfrom': ( '%|from %[2]{pattr} import %c\n', 3 ), + 'importmultiple': ( '%|import %c%c\n', 2, 3), + 'import_cont' : ( ', %c', 2), # CE - Fixes for tuples - '_25_assign2': ( '%|(%c, %c,) = (%c, %c)\n', 3, 4, 0, 1 ), - '_25_assign3': ( '%|(%c, %c, %c,) = (%c, %c, %c)\n', 5, 6, 7, 0, 1, 2 ), + 'assign2': ( '%|%c, %c = %c, %c\n', 3, 4, 0, 1 ), + 'assign3': ( '%|%c, %c, %c = %c, %c, %c\n', 5, 6, 7, 0, 1, 2 ), } @@ -368,12 +369,15 @@ 'unary_not': 22, 'and': 24, + 'ret_and': 24, 'or': 26, + 'ret_or': 26, 'conditional': 28, - 'conditionaland': 28, 'conditionalnot': 28, + 'ret_cond': 28, + 'ret_cond_not': 28, '_mklambda': 30, 'yield': 101 @@ -422,7 +426,7 @@ def find_all_globals(node, globs): def find_none(node): for n in node: if isinstance(n, AST): - if not (n == 'return_stmt'): + if not (n == 'return_stmt' or n == 'return_if_stmt'): if find_none(n): return True elif n.type == 'LOAD_CONST' and n.pattr == None: @@ -447,6 +451,7 @@ def __init__(self, out, scanner, showast=0): self.return_none = False self.mod_globs = set() self.currentclass = None + self.pending_newlines = 0 f = property(lambda s: s.__params['f'], lambda s, x: s.__params.__setitem__('f', x), @@ -476,6 +481,8 @@ def indentLess(self, indent=TAB): def traverse(self, node, indent=None, isLambda=0): self.__param_stack.append(self.__params) if indent is None: indent = self.indent + p = self.pending_newlines + self.pending_newlines = 0 self.__params = { '_globals': {}, 'f': cStringIO.StringIO(), @@ -483,21 +490,49 @@ def traverse(self, node, indent=None, isLambda=0): 'isLambda': isLambda, } self.preorder(node) + self.f.write('\n'*self.pending_newlines) result = self.f.getvalue() self.__params = self.__param_stack.pop() + self.pending_newlines = p return result def write(self, *data): - if type(data) == ListType: - self.f.writelines(data) - elif type(data) == TupleType: - self.f.writelines(list(data)) - else: - self.f.write(data) + if (len(data) == 0) or (len(data) == 1 and data[0] == ''): + return +# import pdb; pdb.set_trace() + out = ''.join((str(j) for j in data)) + n = 0 + for i in out: + if i == '\n': + n += 1 + if n == len(out): + self.pending_newlines = max(self.pending_newlines, n) + return + elif n: + self.pending_newlines = max(self.pending_newlines, n) + out = out[n:] + break + else: + break + + if self.pending_newlines > 0: + self.f.write('\n'*self.pending_newlines) + self.pending_newlines = 0 + + for i in out[::-1]: + if i == '\n': + self.pending_newlines += 1 + else: + break + + if self.pending_newlines: + out = out[:-self.pending_newlines] + self.f.write(out) def print_(self, *data): - self.write(*data) - print >> self.f + if data and not(len(data) == 1 and data[0] ==''): + self.write(*data) + self.pending_newlines = max(self.pending_newlines, 1) def print_docstring(self, indent, docstring): quote = '"""' @@ -539,7 +574,9 @@ def print_docstring(self, indent, docstring): calculate_indent = sys.maxint for line in lines[1:]: stripped = line.lstrip() - calculate_indent = min(calculate_indent, len(line) - len(stripped)) + if len(stripped) > 0: + calculate_indent = min(calculate_indent, len(line) - len(stripped)) + calculate_indent = min(calculate_indent, len(lines[-1]) - len(lines[-1].lstrip())) # Remove indentation (first line is special): trimmed = [lines[0]] if calculate_indent < sys.maxint: @@ -562,8 +599,21 @@ def n_return_stmt(self, node): self.preorder(node[0]) self.prune() else: - self.write(self.indent, 'return ') - if self.return_none or node != AST('return_stmt', [NONE, Token('RETURN_VALUE')]): + self.write(self.indent, 'return') + if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_VALUE')]): + self.write(' ') + self.preorder(node[0]) + self.print_() + self.prune() # stop recursing + + def n_return_if_stmt(self, node): + if self.__params['isLambda']: + self.preorder(node[0]) + self.prune() + else: + self.write(self.indent, 'return') + if self.return_none or node != AST('return_stmt', [AST('ret_expr', [NONE]), Token('RETURN_END_IF')]): + self.write(' ') self.preorder(node[0]) self.print_() self.prune() # stop recursing @@ -576,6 +626,8 @@ def n_yield(self, node): self.prune() # stop recursing def n_buildslice3(self, node): + p = self.prec + self.prec = 100 if node[0] != NONE: self.preorder(node[0]) self.write(':') @@ -584,14 +636,18 @@ def n_buildslice3(self, node): self.write(':') if node[2] != NONE: self.preorder(node[2]) + self.prec = p self.prune() # stop recursing def n_buildslice2(self, node): + p = self.prec + self.prec = 100 if node[0] != NONE: self.preorder(node[0]) self.write(':') if node[1] != NONE: self.preorder(node[1]) + self.prec = p self.prune() # stop recursing # def n_l_stmts(self, node): @@ -608,6 +664,8 @@ def n_expr(self, node): else: n = node[0] self.prec = PRECEDENCE.get(n,-2) + if n == 'LOAD_CONST' and repr(n.pattr)[0] == '-': + self.prec = 6 if p < self.prec: self.write('(') self.preorder(node[0]) @@ -617,6 +675,14 @@ def n_expr(self, node): self.prec = p self.prune() + def n_ret_expr(self, node): + if len(node) == 1 and node[0] == 'expr': + self.n_expr(node[0]) + else: + self.n_expr(node) + + n_ret_expr_or_cond = n_expr + def n_binary_expr(self, node): self.preorder(node[0]) self.write(' ') @@ -662,12 +728,11 @@ def n_delete_subscr(self, node): # 'tryfinallystmt': ( '%|try:\n%+%c%-%|finally:\n%+%c%-', 1, 5 ), def n_tryfinallystmt(self, node): - if node[1] == 'stmts' and \ - len(node[1]) == 1 and \ - node[1][0] == 'sstmt' and \ - node[1][0][0] == 'stmt' and \ - node[1][0][0][0] == 'trystmt' or node[1][0][0] == 'c_trystmt': - node[1][0][0][0].type = 'tf_trystmt' + if len(node[1][0]) == 1 and node[1][0][0] == 'stmt': + if node[1][0][0][0] == 'trystmt': + node[1][0][0][0].type = 'tf_trystmt' + if node[1][0][0][0] == 'tryelsestmt': + node[1][0][0][0].type = 'tf_tryelsestmt' self.default(node) def n_exec_stmt(self, node): @@ -686,40 +751,108 @@ def n_exec_stmt(self, node): self.prune() # stop recursing def n_ifelsestmt(self, node, preprocess=0): - if len(node[3]) == 1 and not node[3][0] == 'continue_stmt': - ifnode = node[3][0][0][0] - if node[3][0] == 'lastc_stmt' and node[3][0][0] == 'iflaststmt': - node.type = 'ifelifstmt' - node[3][0][0].type = 'elifstmt' - elif ifnode == 'ifelsestmt': - node.type = 'ifelifstmt' - self.n_ifelsestmt(ifnode, preprocess=1) - if ifnode == 'ifelifstmt': - ifnode.type = 'elifelifstmt' - elif ifnode == 'ifelsestmt': - ifnode.type = 'elifelsestmt' - elif ifnode == 'ifstmt': - node.type = 'ifelifstmt' - ifnode.type = 'elifstmt' + n = node[3][0] + if len(n) == 1 == len(n[0]) and n[0] == '_stmts': + n = n[0][0][0] + elif n[0].type in ('lastc_stmt', 'lastl_stmt'): + n = n[0][0] + else: + if not preprocess: + self.default(node) + return + + if n.type in ('ifstmt', 'iflaststmt', 'iflaststmtl'): + node.type = 'ifelifstmt' + n.type = 'elifstmt' + elif n.type in ('ifelsestmtr',): + node.type = 'ifelifstmt' + n.type = 'elifelsestmtr' + elif n.type in ('ifelsestmt', 'ifelsestmtc', 'ifelsestmtl'): + node.type = 'ifelifstmt' + self.n_ifelsestmt(n, preprocess=1) + if n == 'ifelifstmt': + n.type = 'elifelifstmt' + elif n.type in ('ifelsestmt', 'ifelsestmtc', 'ifelsestmtl'): + n.type = 'elifelsestmt' if not preprocess: self.default(node) - def n_ifelsestmtl(self, node, preprocess=0): - if len(node[3]) == 1 and node[3][0] == 'lastl_stmt': - ifnode = node[3][0][0] - if ifnode == 'ifelsestmtl': - node.type = 'ifelifstmt' - self.n_ifelsestmtl(ifnode, preprocess=1) - if ifnode == 'ifelifstmt': - ifnode.type = 'elifelifstmt' - elif ifnode == 'ifelsestmtl': - ifnode.type = 'elifelsestmt' - elif ifnode == 'iflaststmtl': - node.type = 'ifelifstmt' - ifnode.type = 'elifstmt' - if not preprocess: + n_ifelsestmtc = n_ifelsestmtl = n_ifelsestmt + + def n_ifelsestmtr(self, node): + if len(node[2]) != 2: + self.default(node) + + if not (node[2][0][0][0] == 'ifstmt' and node[2][0][0][0][1][0] == 'return_if_stmts') \ + and not (node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_if_stmts'): self.default(node) + return + self.write(self.indent, 'if ') + self.preorder(node[0]) + self.print_(':') + self.indentMore() + self.preorder(node[1]) + self.indentLess() + + if_ret_at_end = False + if len(node[2][0]) >= 3: + if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_if_stmts': + if_ret_at_end = True + + past_else = False + prev_stmt_is_if_ret = True + for n in node[2][0]: + if (n[0] == 'ifstmt' and n[0][1][0] == 'return_if_stmts'): + if prev_stmt_is_if_ret: + n[0].type = 'elifstmt' + prev_stmt_is_if_ret = True + else: + prev_stmt_is_if_ret = False + if not past_else and not if_ret_at_end: + self.print_(self.indent, 'else:') + self.indentMore() + past_else = True + self.preorder(n) + if not past_else or if_ret_at_end: + self.print_(self.indent, 'else:') + self.indentMore() + self.preorder(node[2][1]) + self.indentLess() + self.prune() + + def n_elifelsestmtr(self, node): + if len(node[2]) != 2: + self.default(node) + + for n in node[2][0]: + if not (n[0] == 'ifstmt' and n[0][1][0] == 'return_if_stmts'): + self.default(node) + return + + self.write(self.indent, 'elif ') + self.preorder(node[0]) + self.print_(':') + self.indentMore() + self.preorder(node[1]) + self.indentLess() + + if_ret_at_end = False + if len(node[2][0]) >= 3: + if node[2][0][-1][0] == 'ifstmt' and node[2][0][-1][0][1][0] == 'return_if_stmts': + if_ret_at_end = True + + past_else = False + prev_stmt_is_if_ret = True + for n in node[2][0]: + n[0].type = 'elifstmt' + self.preorder(n) + self.print_(self.indent, 'else:') + self.indentMore() + self.preorder(node[2][1]) + self.indentLess() + self.prune() + def n_import_as(self, node): iname = node[0].pattr; assert node[-1][-1].type.startswith('STORE_') @@ -729,11 +862,24 @@ def n_import_as(self, node): else: self.write(iname, ' as ', sname) self.prune() # stop recursing + + n_import_as_cont = n_import_as + def n_importfrom(self, node): + if node[0].pattr > 0: + node[2].pattr = '.'*node[0].pattr+node[2].pattr + self.default(node) + + n_importstar = n_importfrom + def n_mkfunc(self, node): self.write(node[-2].attr.co_name) # = code.co_name self.indentMore() self.make_function(node, isLambda=0) + if len(self.__param_stack) > 1: + self.write('\n\n') + else: + self.write('\n\n\n') self.indentLess() self.prune() # stop recursing @@ -743,7 +889,7 @@ def n_mklambda(self, node): def n_list_compr(self, node): p = self.prec - self.prec = 100 + self.prec = 27 n = node[-1] assert n == 'list_iter' # find innerst node @@ -757,12 +903,12 @@ def n_list_compr(self, node): self.preorder(n[0]) # lc_body self.preorder(node[-1]) # for/if parts self.write( ' ]') - self.prune() # stop recursing self.prec = p + self.prune() # stop recursing def comprehension_walk(self, node, iter_index): p = self.prec - self.prec = 100 + self.prec = 27 code = node[-5].attr assert type(code) == CodeType @@ -770,6 +916,7 @@ def comprehension_walk(self, node, iter_index): #assert isinstance(code, Code) ast = self.build_ast(code._tokens, code._customize) + self.customize(code._customize) ast = ast[0][0][0] n = ast[iter_index] @@ -811,7 +958,7 @@ def n_classdef(self, node): cclass = self.currentclass self.currentclass = str(node[0].pattr) - self.print_() + self.write('\n\n') self.write(self.indent, 'class ', self.currentclass) self.print_super_classes(node) self.print_(':') @@ -820,9 +967,15 @@ def n_classdef(self, node): self.indentMore() self.build_class(node[2][-2].attr) self.indentLess() - self.prune() self.currentclass = cclass + if len(self.__param_stack) > 1: + self.write('\n\n') + else: + self.write('\n\n\n') + + self.prune() + n_classdefdeco2 = n_classdef @@ -883,39 +1036,76 @@ def n_build_list(self, node): """ p = self.prec self.prec = 100 - lastnode = node.pop().type - if lastnode.startswith('BUILD_LIST'): + lastnode = node.pop() + lastnodetype = lastnode.type + if lastnodetype.startswith('BUILD_LIST'): self.write('['); endchar = ']' - elif lastnode.startswith('BUILD_TUPLE'): + elif lastnodetype.startswith('BUILD_TUPLE'): self.write('('); endchar = ')' - elif lastnode.startswith('BUILD_SET'): + elif lastnodetype.startswith('BUILD_SET'): self.write('{'); endchar = '}' - elif lastnode.startswith('ROT_TWO'): + elif lastnodetype.startswith('ROT_TWO'): self.write('('); endchar = ')' else: raise 'Internal Error: n_build_list expects list or tuple' + flat_elems = [] + for elem in node: + if elem == 'expr1024': + for subelem in elem: + for subsubelem in subelem: + flat_elems.append(subsubelem) + elif elem == 'expr32': + for subelem in elem: + flat_elems.append(subelem) + else: + flat_elems.append(elem) + self.indentMore(INDENT_PER_LEVEL) - if len(node) > 3: + if lastnode.attr > 3: line_separator = ',\n' + self.indent else: line_separator = ', ' sep = INDENT_PER_LEVEL[:-1] - for elem in node: - if (elem == 'ROT_THREE'): + + for elem in flat_elems: + if elem == 'ROT_THREE': continue - assert elem == 'expr' value = self.traverse(elem) self.write(sep, value) sep = line_separator - if len(node) == 1 and lastnode.startswith('BUILD_TUPLE'): + if lastnode.attr == 1 and lastnodetype.startswith('BUILD_TUPLE'): self.write(',') self.write(endchar) self.indentLess(INDENT_PER_LEVEL) self.prec = p self.prune() + def n_unpack(self, node): + for n in node[1:]: + if n[0].type == 'unpack': + n[0].type = 'unpack_w_parens' + self.default(node) + + n_unpack_w_parens = n_unpack + + def n_assign2(self, node): + for n in node[-2:]: + if n[0] == 'unpack': + n[0].type = 'unpack_w_parens' + self.default(node) + + def n_assign3(self, node): + for n in node[-3:]: + if n[0] == 'unpack': + n[0].type = 'unpack_w_parens' + self.default(node) + + def n_except_cond2(self, node): + if node[5][0] == 'unpack': + node[5][0].type = 'unpack_w_parens' + self.default(node) def engine(self, entry, startnode): #self.print_("-----") @@ -946,9 +1136,9 @@ def engine(self, entry, startnode): elif typ == '-': self.indentLess() elif typ == '|': self.write(self.indent) ## no longer used, since BUILD_TUPLE_n is pretty printed: - ##elif typ == ',': - ## if lastC == 1: - ## self.write(',') + elif typ == ',': + if lastC == 1: + self.write(',') elif typ == 'c': self.preorder(node[entry[arg]]) arg += 1 @@ -960,8 +1150,8 @@ def engine(self, entry, startnode): arg += 1 elif typ == 'C': low, high, sep = entry[arg] - ## lastC = remaining = len(node[low:high]) - remaining = len(node[low:high]) + lastC = remaining = len(node[low:high]) + ## remaining = len(node[low:high]) for subnode in node[low:high]: self.preorder(subnode) remaining -= 1 @@ -971,8 +1161,8 @@ def engine(self, entry, startnode): elif typ == 'P': p = self.prec low, high, sep, self.prec = entry[arg] - ## lastC = remaining = len(node[low:high]) - remaining = len(node[low:high]) + lastC = remaining = len(node[low:high]) + ## remaining = len(node[low:high]) for subnode in node[low:high]: self.preorder(subnode) remaining -= 1 @@ -984,7 +1174,7 @@ def engine(self, entry, startnode): d = node.__dict__ expr = m.group('expr') try: - self.f.write(eval(expr, d, d)) + self.write(eval(expr, d, d)) except: print node raise @@ -1066,9 +1256,9 @@ def get_tuple_parameter(self, ast, name): assert node[1] == 'designator' # if lhs is not a UNPACK_TUPLE (or equiv.), # add parenteses to make this a tuple - if node[1][0] not in ('unpack', 'unpack_list'): - return '(' + self.traverse(node[1]) + ')' - return self.traverse(node[1]) + #if node[1][0] not in ('unpack', 'unpack_list'): + return '(' + self.traverse(node[1]) + ')' + #return self.traverse(node[1]) raise "Can't find tuple parameter" % name @@ -1206,7 +1396,7 @@ def build_class(self, code): #else: # print ast[-1][-1] - for g in find_globals(ast, {}).keys(): + for g in find_globals(ast, set()): self.print_(indent, 'global ', g) self.gen_source(ast, code._customize) @@ -1243,12 +1433,6 @@ def build_ast(self, tokens, customize, isLambda=0, noneInNames=False): self.print_(repr(ast)) return ast -# while(len(tokens) > 2): -# if (tokens[-1] == Token('RETURN_VALUE')) and (tokens[-2] == Token('LOAD_CONST') and (tokens[-3].type != 'END_IF_LINE')): -# del tokens[-2:] -# else: -# break - if len(tokens) > 2 or len(tokens) == 2 and not noneInNames: if tokens[-1] == Token('RETURN_VALUE'): if tokens[-2] == Token('LOAD_CONST'): @@ -1262,28 +1446,7 @@ def build_ast(self, tokens, customize, isLambda=0, noneInNames=False): try: ast = Parser.parse(tokens, customize) except Parser.ParserError, e: - try: - tokens.append(Token('LOAD_CONST')) - tokens.append(Token('RETURN_VALUE')) - ast = Parser.parse(tokens, customize) - except Parser.ParserError, e: - try: - del tokens[-2:] - Parser.p.addRule('stmt ::= continue_stmt', Parser.nop) - ast = Parser.parse(tokens, customize) - except Parser.ParserError, e: - try: - Parser.p.addRule('c_stmts ::= return_stmt', Parser.nop) - ast = Parser.parse(tokens, customize) - except: - try: - Parser.p.addRule('stmt ::= return_stmt', Parser.nop) - ast = Parser.parse(tokens, customize) - except: - raise ParserError(e, tokens) - finally: - Parser.p.cleanup() - Parser.p = Parser.Parser() + raise ParserError(e, tokens) if self.showast: diff --git a/uncompyle/__init__.py b/uncompyle/__init__.py index 4fd5f16..e6f8cc3 100644 --- a/uncompyle/__init__.py +++ b/uncompyle/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) 1999 John Aycock -# Copyright (c) 2000 by hartmut Goebel +# Copyright (c) 2000 by hartmut Goebel # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -78,7 +78,7 @@ def _load_module(filename): fp.close() return version, co -def uncompyle(version, co, out=None, showasm=0, showast=0): +def uncompyle(version, co, out=None, showasm=0, showast=0, deob=0): """ diassembles a given code block 'co' """ @@ -86,10 +86,11 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): # store final output stream for case of error __real_out = out or sys.stdout - + if co.co_filename: + print >>__real_out, '#Embedded file name: %s' % co.co_filename scanner = Scanner.getscanner(version) scanner.setShowAsm(showasm, out) - tokens, customize = scanner.disassemble(co) + tokens, customize = scanner.disassemble(co, deob=deob) # Build AST from disassembly. walker = Walker.Walker(out, scanner, showast=showast) @@ -114,15 +115,19 @@ def uncompyle(version, co, out=None, showasm=0, showast=0): pass walker.mod_globs = Walker.find_globals(ast, set()) walker.gen_source(ast, customize) + for g in walker.mod_globs: + walker.write('global %s ## Warning: Unused global\n' % g) + if walker.pending_newlines: + print >>__real_out if walker.ERROR: raise walker.ERROR -def uncompyle_file(filename, outstream=None, showasm=0, showast=0): +def uncompyle_file(filename, outstream=None, showasm=0, showast=0, deob=0): """ decompile Python byte-code file (.pyc) """ version, co = _load_module(filename) - uncompyle(version, co, outstream, showasm, showast) + uncompyle(version, co, outstream, showasm, showast, deob) co = None #---- main ------- @@ -138,7 +143,7 @@ def __memUsage(): return '' def main(in_base, out_base, files, codes, outfile=None, - showasm=0, showast=0, do_verify=0): + showasm=0, showast=0, do_verify=0, py=0, deob=0): """ in_base base directory for input files out_base base directory for output files (ignored when @@ -168,7 +173,7 @@ def _get_outstream(outfile): version = sys.version[:3] # "2.5" with open(code, "r") as f: co = compile(f.read(), "", "exec") - uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast) + uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast, deob=deob) for file in files: infile = os.path.join(in_base, file) @@ -179,13 +184,17 @@ def _get_outstream(outfile): elif out_base is None: outstream = sys.stdout else: - outfile = os.path.join(out_base, file) + '_dis' + outfile = os.path.join(out_base, file) + if py: + outfile = outfile[:-1] + else: + outfile += '_dis' outstream = _get_outstream(outfile) #print >>sys.stderr, outfile # try to decomyple the input file try: - uncompyle_file(infile, outstream, showasm, showast) + uncompyle_file(infile, outstream, showasm, showast, deob) tot_files += 1 except KeyboardInterrupt: if outfile: diff --git a/uncompyle/verify.py b/uncompyle/verify.py index 6fd2148..6b2403a 100644 --- a/uncompyle/verify.py +++ b/uncompyle/verify.py @@ -1,234 +1,340 @@ # -# (C) Copyright 2000-2002 by hartmut Goebel +# (C) Copyright 2000-2002 by hartmut Goebel # # byte-code verifier for uncompyle # import types -import uncompyle, Scanner +import operator +import dis +import uncompyle2, Scanner + +BIN_OP_FUNCS = { +'BINARY_POWER': operator.pow, +'BINARY_MULTIPLY': operator.mul, +'BINARY_DIVIDE': operator.div, +'BINARY_FLOOR_DIVIDE': operator.floordiv, +'BINARY_TRUE_DIVIDE': operator.truediv, +'BINARY_MODULO' : operator.mod, +'BINARY_ADD': operator.add, +'BINARY_SUBRACT': operator.sub, +'BINARY_LSHIFT': operator.lshift, +'BINARY_RSHIFT': operator.rshift, +'BINARY_AND': operator.and_, +'BINARY_XOR': operator.xor, +'BINARY_OR': operator.or_, +} JUMP_OPs = None #--- exceptions --- class VerifyCmpError(Exception): - pass + pass class CmpErrorConsts(VerifyCmpError): - """Exception to be raised when consts differ.""" - def __init__(self, name, index): - self.name = name - self.index = index - - def __str__(self): - return 'Compare Error within Consts of %s at index %i' % \ - (repr(self.name), self.index) - + """Exception to be raised when consts differ.""" + def __init__(self, name, index): + self.name = name + self.index = index + + def __str__(self): + return 'Compare Error within Consts of %s at index %i' % \ + (repr(self.name), self.index) + class CmpErrorConstsType(VerifyCmpError): - """Exception to be raised when consts differ.""" - def __init__(self, name, index): - self.name = name - self.index = index + """Exception to be raised when consts differ.""" + def __init__(self, name, index): + self.name = name + self.index = index - def __str__(self): - return 'Consts type differ in %s at index %i' % \ - (repr(self.name), self.index) + def __str__(self): + return 'Consts type differ in %s at index %i' % \ + (repr(self.name), self.index) class CmpErrorConstsLen(VerifyCmpError): - """Exception to be raised when length of co_consts differs.""" - def __init__(self, name, consts1, consts2): - self.name = name - self.consts = (consts1, consts2) - - def __str__(self): - return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \ - (repr(self.name), - len(self.consts[0]), `self.consts[0]`, - len(self.consts[1]), `self.consts[1]`) - + """Exception to be raised when length of co_consts differs.""" + def __init__(self, name, consts1, consts2): + self.name = name + self.consts = (consts1, consts2) + + def __str__(self): + return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \ + (repr(self.name), + len(self.consts[0]), `self.consts[0]`, + len(self.consts[1]), `self.consts[1]`) + class CmpErrorCode(VerifyCmpError): - """Exception to be raised when code differs.""" - def __init__(self, name, index, token1, token2, tokens1, tokens2): - self.name = name - self.index = index - self.token1 = token1 - self.token2 = token2 - self.tokens = [tokens1, tokens2] - - def __str__(self): - s = reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), - map(lambda a,b: (a,b), - self.tokens[0], - self.tokens[1]), - 'Code differs in %s\n' % str(self.name)) - return ('Code differs in %s at offset %i [%s] != [%s]\n\n' % \ - (repr(self.name), self.index, - repr(self.token1), repr(self.token2))) + s + """Exception to be raised when code differs.""" + def __init__(self, name, index, token1, token2, tokens1, tokens2): + self.name = name + self.index = index + self.token1 = token1 + self.token2 = token2 + self.tokens = [tokens1, tokens2] + + def __str__(self): + s = reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), + map(lambda a,b: (a,b), + self.tokens[0], + self.tokens[1]), + 'Code differs in %s\n' % str(self.name)) + return ('Code differs in %s at offset %s [%s] != [%s]\n\n' % \ + (repr(self.name), self.index, + repr(self.token1), repr(self.token2))) + s class CmpErrorCodeLen(VerifyCmpError): - """Exception to be raised when code length differs.""" - def __init__(self, name, tokens1, tokens2): - self.name = name - self.tokens = [tokens1, tokens2] - - def __str__(self): - return reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), - map(lambda a,b: (a,b), - self.tokens[0], - self.tokens[1]), - 'Code len differs in %s\n' % str(self.name)) + """Exception to be raised when code length differs.""" + def __init__(self, name, tokens1, tokens2): + self.name = name + self.tokens = [tokens1, tokens2] + + def __str__(self): + return reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]), + map(lambda a,b: (a,b), + self.tokens[0], + self.tokens[1]), + 'Code len differs in %s\n' % str(self.name)) class CmpErrorMember(VerifyCmpError): - """Exception to be raised when other members differ.""" - def __init__(self, name, member, data1, data2): - self.name = name - self.member = member - self.data = (data1, data2) + """Exception to be raised when other members differ.""" + def __init__(self, name, member, data1, data2): + self.name = name + self.member = member + self.data = (data1, data2) - def __str__(self): - return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \ - (repr(self.member), repr(self.name), - repr(self.data[0]), repr(self.data[1])) + def __str__(self): + return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \ + (repr(self.member), repr(self.name), + repr(self.data[0]), repr(self.data[1])) #--- compare --- - + # these members are ignored -__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_consts'] +__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_stacksize', 'co_names'] def cmp_code_objects(version, code_obj1, code_obj2, name=''): - """ - Compare two code-objects. - - This is the main part of this module. - """ - #print code_obj1, type(code_obj2) - assert type(code_obj1) == types.CodeType - assert type(code_obj2) == types.CodeType - #print dir(code_obj1) - if isinstance(code_obj1, object): - # new style classes (Python 2.2) - # assume _both_ code objects to be new stle classes - assert dir(code_obj1) == dir(code_obj2) - else: - # old style classes - assert dir(code_obj1) == code_obj1.__members__ - assert dir(code_obj2) == code_obj2.__members__ - assert code_obj1.__members__ == code_obj2.__members__ - - if name == '__main__': - name = code_obj1.co_name - else: - name = '%s.%s' % (name, code_obj1.co_name) - if name == '.?': name = '__main__' - - if isinstance(code_obj1, object) and cmp(code_obj1, code_obj2): - # use the new style code-classes' __cmp__ method, which - # should be faster and more sophisticated - # if this compare fails, we use the old routine to - # find out, what exactly is nor equal - # if this compare succeds, simply return - #return - pass - - if isinstance(code_obj1, object): - members = filter(lambda x: x.startswith('co_'), dir(code_obj1)) - else: - members = dir(code_obj1); - members.sort(); #members.reverse() - - tokens1 = None - for member in members: - if member in __IGNORE_CODE_MEMBERS__: - pass - elif member == 'co_code': - scanner = Scanner.getscanner(version) - scanner.setShowAsm( showasm=0 ) - global JUMP_OPs - JUMP_OPs = scanner.JUMP_OPs - - # use changed Token class - # we (re)set this here to save exception handling, - # which would get 'unubersichtlich' - scanner.setTokenClass(Token) - try: - # disassemble both code-objects - tokens1,customize = scanner.disassemble(code_obj1) - del customize # save memory - tokens2,customize = scanner.disassemble(code_obj2) - del customize # save memory - finally: - scanner.resetTokenClass() # restore Token class - - # compare length - if len(tokens1) != len(tokens2): - raise CmpErrorCodeLen(name, tokens1, tokens2) - # compare contents - #print len(tokens1), type(tokens1), type(tokens2) - for i in xrange(len(tokens1)): - if tokens1[i] != tokens2[i]: - #print '-->', i, type(tokens1[i]), type(tokens2[i]) - raise CmpErrorCode(name, i, tokens1[i], - tokens2[i], tokens1, tokens2) - del tokens1, tokens2 # save memory - elif member == 'co_consts': - # compare length - if len(code_obj1.co_consts) != len(code_obj2.co_consts): - raise CmpErrorConstsLen(name, code_obj1.co_consts ,code_obj2.co_consts) - # compare contents - for idx in xrange(len(code_obj1.co_consts)): - const1 = code_obj1.co_consts[idx] - const2 = code_obj2.co_consts[idx] - ## print code_obj1.co_consts[idx], '\t', - ## print code_obj2.co_consts[idx] - # same type? - if type(const1) != type(const2): - raise CmpErrorConstsType(name, idx) - if type(const1) == types.CodeType: - # code object -> recursive compare - cmp_code_objects(version, const1, - const2, name=name) - elif cmp(const1, const2) != 0: - # content differs - raise CmpErrorConsts(name, idx) - else: - # all other members must be equal - if getattr(code_obj1, member) != getattr(code_obj2, member): - raise CmpErrorMember(name, member, - getattr(code_obj1,member), - getattr(code_obj2,member)) + """ + Compare two code-objects. -class Token(Scanner.Token): - """Token class with changed semantics for 'cmp()'.""" + This is the main part of this module. + """ + #print code_obj1, type(code_obj2) + assert type(code_obj1) == types.CodeType + assert type(code_obj2) == types.CodeType + #print dir(code_obj1) + if isinstance(code_obj1, object): + # new style classes (Python 2.2) + # assume _both_ code objects to be new stle classes + assert dir(code_obj1) == dir(code_obj2) + else: + # old style classes + assert dir(code_obj1) == code_obj1.__members__ + assert dir(code_obj2) == code_obj2.__members__ + assert code_obj1.__members__ == code_obj2.__members__ + + if name == '__main__': + name = code_obj1.co_name + else: + name = '%s.%s' % (name, code_obj1.co_name) + if name == '.?': name = '__main__' + + if isinstance(code_obj1, object) and cmp(code_obj1, code_obj2): + # use the new style code-classes' __cmp__ method, which + # should be faster and more sophisticated + # if this compare fails, we use the old routine to + # find out, what exactly is nor equal + # if this compare succeds, simply return + #return + pass - def __cmp__(self, o): - t = self.type # shortcut - if t in JUMP_OPs: - # ignore offset - return cmp(t, o.type) - else: - return cmp(t, o.type) \ - or cmp(self.pattr, o.pattr) + if isinstance(code_obj1, object): + members = filter(lambda x: x.startswith('co_'), dir(code_obj1)) + else: + members = dir(code_obj1); + members.sort(); #members.reverse() + + tokens1 = None + for member in members: + if member in __IGNORE_CODE_MEMBERS__: + pass + elif member == 'co_code': + scanner = Scanner.getscanner(version) + scanner.setShowAsm( showasm=0 ) + global JUMP_OPs + JUMP_OPs = scanner.JUMP_OPs + ['JUMP_BACK'] + + # use changed Token class + # we (re)set this here to save exception handling, + # which would get 'unubersichtlich' + scanner.setTokenClass(Token) + try: + # disassemble both code-objects + tokens1,customize = scanner.disassemble(code_obj1) + del customize # save memory + tokens2,customize = scanner.disassemble(code_obj2) + del customize # save memory + finally: + scanner.resetTokenClass() # restore Token class + + targets1 = dis.findlabels(code_obj1.co_code) + tokens1 = [t for t in tokens1 if t.type != 'COME_FROM'] + tokens2 = [t for t in tokens2 if t.type != 'COME_FROM'] + + i1 = 0; i2 = 0 + offset_map = {}; check_jumps = {} + while i1 < len(tokens1): + if i2 >= len(tokens2): + if len(tokens1) == len(tokens2) + 2 \ + and tokens1[-1].type == 'RETURN_VALUE' \ + and tokens1[-2].type == 'LOAD_CONST' \ + and tokens1[-2].pattr == None \ + and tokens1[-3].type == 'RETURN_VALUE': + break + else: + raise CmpErrorCodeLen(name, tokens1, tokens2) + + offset_map[tokens1[i1].offset] = tokens2[i2].offset + + for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []): + if offset2 != tokens2[i2].offset: + raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], + tokens2[idx2], tokens1, tokens2) + + if tokens1[i1] != tokens2[i2]: + if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type: + i = 1 + while tokens1[i1+i].type == 'LOAD_CONST': + i += 1 + if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \ + and i == int(tokens1[i1+i].type.split('_')[-1]): + t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ]) + if t != tokens2[i2].pattr: + raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], + tokens2[i2], tokens1, tokens2) + i1 += i + 1 + i2 += 1 + continue + elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2': + i1 += 3 + i2 += 2 + continue + elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS: + f = BIN_OP_FUNCS[tokens1[i1+i].type] + if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr: + i1 += 3 + i2 += 1 + continue + elif i == 1 and tokens1[i1+i].type == 'STORE_NAME' == tokens2[i2+i].type \ + and tokens1[i1+i].pattr == '__doc__' == tokens2[i2+i].pattr: + i1 += 2 + i2 += 2 + continue + elif tokens1[i1].type == 'UNARY_NOT': + if tokens2[i2].type == 'POP_JUMP_IF_TRUE': + if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE': + i1 += 2 + i2 += 1 + continue + elif tokens2[i2].type == 'POP_JUMP_IF_FALSE': + if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE': + i1 += 2 + i2 += 1 + continue + elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \ + and tokens1[i1-1].type == 'RETURN_VALUE' \ + and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \ + and int(tokens1[i1].offset) not in targets1: + i1 += 1 + continue + elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \ + and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \ + and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: + if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset): + i1 += 2 + i2 += 2 + continue + + raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], + tokens2[i2], tokens1, tokens2) + elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr: + dest1 = int(tokens1[i1].pattr) + dest2 = int(tokens2[i2].pattr) + if tokens1[i1].type == 'JUMP_BACK': + if offset_map[dest1] != dest2: + raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], + tokens2[i2], tokens1, tokens2) + else: + #import pdb; pdb.set_trace() + if dest1 in check_jumps: + check_jumps[dest1].append((i1,i2,dest2)) + else: + check_jumps[dest1] = [(i1,i2,dest2)] + + i1 += 1 + i2 += 1 + del tokens1, tokens2 # save memory + elif member == 'co_consts': + # partial optimization can make the co_consts look different, + # so we'll just compare the code consts + codes1 = ( c for c in code_obj1.co_consts if type(c) == types.CodeType ) + codes2 = ( c for c in code_obj2.co_consts if type(c) == types.CodeType ) + + for c1, c2 in zip(codes1, codes2): + cmp_code_objects(version, c1, c2, name=name) + else: + # all other members must be equal + if getattr(code_obj1, member) != getattr(code_obj2, member): + raise CmpErrorMember(name, member, + getattr(code_obj1,member), + getattr(code_obj2,member)) + +class Token(Scanner.Token): + """Token class with changed semantics for 'cmp()'.""" + + def __cmp__(self, o): + t = self.type # shortcut + loads = ('LOAD_NAME', 'LOAD_GLOBAL', 'LOAD_CONST') + if t in loads and o.type in loads: + if self.pattr == 'None' and o.pattr == None: + return 0 + if t == 'BUILD_TUPLE_0' and o.type == 'LOAD_CONST' and o.pattr == (): + return 0 + if t == 'COME_FROM' == o.type: + return 0 + if t == 'PRINT_ITEM_CONT' and o.type == 'PRINT_ITEM': + return 0 + if t == 'RETURN_VALUE' and o.type == 'RETURN_END_IF': + return 0 + if t == 'JUMP_IF_FALSE_OR_POP' and o.type == 'POP_JUMP_IF_FALSE': + return 0 + if t in JUMP_OPs: + # ignore offset + return cmp(t, o.type) + return cmp(t, o.type) or cmp(self.pattr, o.pattr) - def __repr__(self): - return '%s %s (%s)' % (str(self.type), str(self.attr), - repr(self.pattr)) + def __repr__(self): + return '%s %s (%s)' % (str(self.type), str(self.attr), + repr(self.pattr)) + def __str__(self): + return '%s\t%-17s %r' % (self.offset, self.type, self.pattr) def compare_code_with_srcfile(pyc_filename, src_filename): - """Compare a .pyc with a source code file.""" - version, code_obj1 = uncompyle._load_module(pyc_filename) - code_obj2 = uncompyle._load_file(src_filename) - cmp_code_objects(version, code_obj1, code_obj2) + """Compare a .pyc with a source code file.""" + version, code_obj1 = uncompyle2._load_module(pyc_filename) + code_obj2 = uncompyle2._load_file(src_filename) + cmp_code_objects(version, code_obj1, code_obj2) def compare_files(pyc_filename1, pyc_filename2): - """Compare two .pyc files.""" - version, code_obj1 = uncompyle._load_module(pyc_filename1) - version, code_obj2 = uncompyle._load_module(pyc_filename2) - cmp_code_objects(version, code_obj1, code_obj2) + """Compare two .pyc files.""" + version, code_obj1 = uncompyle2._load_module(pyc_filename1) + version, code_obj2 = uncompyle2._load_module(pyc_filename2) + cmp_code_objects(version, code_obj1, code_obj2) if __name__ == '__main__': - t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52) - t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55) - print `t1` - print `t2` - print cmp(t1, t2), cmp(t1.type, t2.type), cmp(t1.attr, t2.attr) + t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52) + t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55) + print `t1` + print `t2` + print cmp(t1, t2), cmp(t1.type, t2.type), cmp(t1.attr, t2.attr)