Skip to content

Commit

Permalink
Merge pull request #19 from fedora-python/hardlink_dupes
Browse files Browse the repository at this point in the history
PoC - Hardlinking of duplicated pyc files
  • Loading branch information
frenzymadness committed Feb 10, 2020
2 parents a4cb957 + 1d4d994 commit 5198660
Show file tree
Hide file tree
Showing 2 changed files with 530 additions and 6 deletions.
42 changes: 36 additions & 6 deletions compileall2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import importlib.util
import py_compile
import struct
import filecmp

from functools import partial
from pathlib import Path
Expand Down Expand Up @@ -86,7 +87,7 @@ def _walk_dir(dir, maxlevels, quiet=0):
def compile_dir(dir, maxlevels=None, ddir=None, force=False,
rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
invalidation_mode=None, stripdir=None,
prependdir=None, limit_sl_dest=None):
prependdir=None, limit_sl_dest=None, hardlink_dupes=False):
"""Byte-compile all modules in the given directory tree.
Arguments (only dir is required):
Expand All @@ -109,6 +110,7 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
after stripdir
limit_sl_dest: ignore symlinks if they are pointing outside of
the defined path
hardlink_dupes: hardlink duplicated pyc files
"""
ProcessPoolExecutor = None
if workers is not None:
Expand Down Expand Up @@ -144,14 +146,15 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
if not compile_file(file, ddir, force, rx, quiet,
legacy, optimize, invalidation_mode,
stripdir=stripdir, prependdir=prependdir,
limit_sl_dest=limit_sl_dest):
limit_sl_dest=limit_sl_dest,
hardlink_dupes=hardlink_dupes):
success = False
return success

def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1,
invalidation_mode=None, stripdir=None, prependdir=None,
limit_sl_dest=None):
limit_sl_dest=None, hardlink_dupes=False):
"""Byte-compile one file.
Arguments (only fullname is required):
Expand All @@ -172,6 +175,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
after stripdir
limit_sl_dest: ignore symlinks if they are pointing outside of
the defined path.
hardlink_dupes: hardlink duplicated pyc files
"""

if ddir is not None and (stripdir is not None or prependdir is not None):
Expand Down Expand Up @@ -212,6 +216,10 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if isinstance(optimize, int):
optimize = [optimize]

if hardlink_dupes:
raise ValueError(("Hardlinking of duplicated bytecode makes sense "
"only for more than one optimization level."))

if rx is not None:
mo = rx.search(fullname)
if mo:
Expand Down Expand Up @@ -256,14 +264,27 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if not quiet:
print('Compiling {!r}...'.format(fullname))
try:
for opt_level, cfile in opt_cfiles.items():
for index, opt_level in enumerate(sorted(optimize)):
cfile = opt_cfiles[opt_level]
if PY37:
ok = py_compile.compile(fullname, cfile, dfile, True,
optimize=opt_level,
invalidation_mode=invalidation_mode)
else:
ok = py_compile.compile(fullname, cfile, dfile, True,
optimize=opt_level)

if index > 0 and hardlink_dupes:
previous_cfile = opt_cfiles[optimize[index - 1]]
if previous_cfile == cfile and optimize[0] not in (1, 2):
# Python 3.4 has only one .pyo file for -O and -OO so
# we hardlink it only if there is a .pyc file
# with the same content
previous_cfile = opt_cfiles[optimize[0]]
if previous_cfile != cfile and filecmp.cmp(cfile, previous_cfile, shallow=False):
os.unlink(cfile)
os.link(previous_cfile, cfile)

except py_compile.PyCompileError as err:
success = False
if quiet >= 2:
Expand Down Expand Up @@ -384,6 +405,9 @@ def main():
'Python interpreter itself (specified by -O).'))
parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
help='Ignore symlinks pointing outsite of the DIR')
parser.add_argument('--hardlink-dupes', action='store_true',
dest='hardlink_dupes',
help='Hardlink duplicated pyc files')

if PY37:
invalidation_modes = [mode.name.lower().replace('_', '-')
Expand Down Expand Up @@ -413,6 +437,10 @@ def main():
if args.opt_levels is None:
args.opt_levels = [-1]

if len(args.opt_levels) == 1 and args.hardlink_dupes:
parser.error(("Hardlinking of duplicated bytecode makes sense "
"only for more than one optimization level."))

if args.ddir is not None and (
args.stripdir is not None or args.prependdir is not None
):
Expand Down Expand Up @@ -449,7 +477,8 @@ def main():
stripdir=args.stripdir,
prependdir=args.prependdir,
optimize=args.opt_levels,
limit_sl_dest=args.limit_sl_dest):
limit_sl_dest=args.limit_sl_dest,
hardlink_dupes=args.hardlink_dupes):
success = False
else:
if not compile_dir(dest, maxlevels, args.ddir,
Expand All @@ -459,7 +488,8 @@ def main():
stripdir=args.stripdir,
prependdir=args.prependdir,
optimize=args.opt_levels,
limit_sl_dest=args.limit_sl_dest):
limit_sl_dest=args.limit_sl_dest,
hardlink_dupes=args.hardlink_dupes):
success = False
return success
else:
Expand Down
Loading

0 comments on commit 5198660

Please sign in to comment.