Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PoC - Hardlinking of duplicated pyc files #19

Merged
merged 1 commit into from
Feb 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 36 additions & 6 deletions compileall2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import importlib.util
import py_compile
import struct
import filecmp

from functools import partial
from pathlib import Path
Expand Down Expand Up @@ -86,7 +87,7 @@ def _walk_dir(dir, maxlevels, quiet=0):
def compile_dir(dir, maxlevels=None, ddir=None, force=False,
rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
invalidation_mode=None, stripdir=None,
prependdir=None, limit_sl_dest=None):
prependdir=None, limit_sl_dest=None, hardlink_dupes=False):
"""Byte-compile all modules in the given directory tree.

Arguments (only dir is required):
Expand All @@ -109,6 +110,7 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
after stripdir
limit_sl_dest: ignore symlinks if they are pointing outside of
the defined path
hardlink_dupes: hardlink duplicated pyc files
"""
ProcessPoolExecutor = None
if workers is not None:
Expand Down Expand Up @@ -144,14 +146,15 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False,
if not compile_file(file, ddir, force, rx, quiet,
legacy, optimize, invalidation_mode,
stripdir=stripdir, prependdir=prependdir,
limit_sl_dest=limit_sl_dest):
limit_sl_dest=limit_sl_dest,
hardlink_dupes=hardlink_dupes):
success = False
return success

def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1,
invalidation_mode=None, stripdir=None, prependdir=None,
limit_sl_dest=None):
limit_sl_dest=None, hardlink_dupes=False):
"""Byte-compile one file.

Arguments (only fullname is required):
Expand All @@ -172,6 +175,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
after stripdir
limit_sl_dest: ignore symlinks if they are pointing outside of
the defined path.
hardlink_dupes: hardlink duplicated pyc files
"""

if ddir is not None and (stripdir is not None or prependdir is not None):
Expand Down Expand Up @@ -212,6 +216,10 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if isinstance(optimize, int):
optimize = [optimize]

if hardlink_dupes:
raise ValueError(("Hardlinking of duplicated bytecode makes sense "
"only for more than one optimization level."))

if rx is not None:
mo = rx.search(fullname)
if mo:
Expand Down Expand Up @@ -256,14 +264,27 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if not quiet:
print('Compiling {!r}...'.format(fullname))
try:
for opt_level, cfile in opt_cfiles.items():
for index, opt_level in enumerate(sorted(optimize)):
cfile = opt_cfiles[opt_level]
if PY37:
ok = py_compile.compile(fullname, cfile, dfile, True,
optimize=opt_level,
invalidation_mode=invalidation_mode)
else:
ok = py_compile.compile(fullname, cfile, dfile, True,
optimize=opt_level)

if index > 0 and hardlink_dupes:
previous_cfile = opt_cfiles[optimize[index - 1]]
if previous_cfile == cfile and optimize[0] not in (1, 2):
# Python 3.4 has only one .pyo file for -O and -OO so
# we hardlink it only if there is a .pyc file
# with the same content
previous_cfile = opt_cfiles[optimize[0]]
if previous_cfile != cfile and filecmp.cmp(cfile, previous_cfile, shallow=False):
os.unlink(cfile)
os.link(previous_cfile, cfile)

except py_compile.PyCompileError as err:
success = False
if quiet >= 2:
Expand Down Expand Up @@ -384,6 +405,9 @@ def main():
'Python interpreter itself (specified by -O).'))
parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
help='Ignore symlinks pointing outsite of the DIR')
parser.add_argument('--hardlink-dupes', action='store_true',
dest='hardlink_dupes',
help='Hardlink duplicated pyc files')

if PY37:
invalidation_modes = [mode.name.lower().replace('_', '-')
Expand Down Expand Up @@ -413,6 +437,10 @@ def main():
if args.opt_levels is None:
args.opt_levels = [-1]

if len(args.opt_levels) == 1 and args.hardlink_dupes:
parser.error(("Hardlinking of duplicated bytecode makes sense "
"only for more than one optimization level."))
hroncok marked this conversation as resolved.
Show resolved Hide resolved

if args.ddir is not None and (
args.stripdir is not None or args.prependdir is not None
):
Expand Down Expand Up @@ -449,7 +477,8 @@ def main():
stripdir=args.stripdir,
prependdir=args.prependdir,
optimize=args.opt_levels,
limit_sl_dest=args.limit_sl_dest):
limit_sl_dest=args.limit_sl_dest,
hardlink_dupes=args.hardlink_dupes):
success = False
else:
if not compile_dir(dest, maxlevels, args.ddir,
Expand All @@ -459,7 +488,8 @@ def main():
stripdir=args.stripdir,
prependdir=args.prependdir,
optimize=args.opt_levels,
limit_sl_dest=args.limit_sl_dest):
limit_sl_dest=args.limit_sl_dest,
hardlink_dupes=args.hardlink_dupes):
success = False
return success
else:
Expand Down
Loading