diff --git a/compileall2.py b/compileall2.py index 4313738..8976fa0 100644 --- a/compileall2.py +++ b/compileall2.py @@ -19,6 +19,7 @@ import importlib.util import py_compile import struct +import filecmp from functools import partial from pathlib import Path @@ -86,7 +87,7 @@ def _walk_dir(dir, maxlevels, quiet=0): def compile_dir(dir, maxlevels=None, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, stripdir=None, - prependdir=None, limit_sl_dest=None): + prependdir=None, limit_sl_dest=None, hardlink_dupes=False): """Byte-compile all modules in the given directory tree. Arguments (only dir is required): @@ -109,6 +110,7 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False, after stripdir limit_sl_dest: ignore symlinks if they are pointing outside of the defined path + hardlink_dupes: hardlink duplicated pyc files """ ProcessPoolExecutor = None if workers is not None: @@ -144,14 +146,15 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False, if not compile_file(file, ddir, force, rx, quiet, legacy, optimize, invalidation_mode, stripdir=stripdir, prependdir=prependdir, - limit_sl_dest=limit_sl_dest): + limit_sl_dest=limit_sl_dest, + hardlink_dupes=hardlink_dupes): success = False return success def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, stripdir=None, prependdir=None, - limit_sl_dest=None): + limit_sl_dest=None, hardlink_dupes=False): """Byte-compile one file. Arguments (only fullname is required): @@ -172,6 +175,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, after stripdir limit_sl_dest: ignore symlinks if they are pointing outside of the defined path. + hardlink_dupes: hardlink duplicated pyc files """ if ddir is not None and (stripdir is not None or prependdir is not None): @@ -212,6 +216,10 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if isinstance(optimize, int): optimize = [optimize] + if hardlink_dupes: + raise ValueError(("Hardlinking of duplicated bytecode makes sense " + "only for more than one optimization level.")) + if rx is not None: mo = rx.search(fullname) if mo: @@ -256,7 +264,8 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if not quiet: print('Compiling {!r}...'.format(fullname)) try: - for opt_level, cfile in opt_cfiles.items(): + for index, opt_level in enumerate(sorted(optimize)): + cfile = opt_cfiles[opt_level] if PY37: ok = py_compile.compile(fullname, cfile, dfile, True, optimize=opt_level, @@ -264,6 +273,18 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, else: ok = py_compile.compile(fullname, cfile, dfile, True, optimize=opt_level) + + if index > 0 and hardlink_dupes: + previous_cfile = opt_cfiles[optimize[index - 1]] + if previous_cfile == cfile and optimize[0] not in (1, 2): + # Python 3.4 has only one .pyo file for -O and -OO so + # we hardlink it only if there is a .pyc file + # with the same content + previous_cfile = opt_cfiles[optimize[0]] + if previous_cfile != cfile and filecmp.cmp(cfile, previous_cfile, shallow=False): + os.unlink(cfile) + os.link(previous_cfile, cfile) + except py_compile.PyCompileError as err: success = False if quiet >= 2: @@ -384,6 +405,9 @@ def main(): 'Python interpreter itself (specified by -O).')) parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest', help='Ignore symlinks pointing outsite of the DIR') + parser.add_argument('--hardlink-dupes', action='store_true', + dest='hardlink_dupes', + help='Hardlink duplicated pyc files') if PY37: invalidation_modes = [mode.name.lower().replace('_', '-') @@ -413,6 +437,10 @@ def main(): if args.opt_levels is None: args.opt_levels = [-1] + if len(args.opt_levels) == 1 and args.hardlink_dupes: + parser.error(("Hardlinking of duplicated bytecode makes sense " + "only for more than one optimization level.")) + if args.ddir is not None and ( args.stripdir is not None or args.prependdir is not None ): @@ -449,7 +477,8 @@ def main(): stripdir=args.stripdir, prependdir=args.prependdir, optimize=args.opt_levels, - limit_sl_dest=args.limit_sl_dest): + limit_sl_dest=args.limit_sl_dest, + hardlink_dupes=args.hardlink_dupes): success = False else: if not compile_dir(dest, maxlevels, args.ddir, @@ -459,7 +488,8 @@ def main(): stripdir=args.stripdir, prependdir=args.prependdir, optimize=args.opt_levels, - limit_sl_dest=args.limit_sl_dest): + limit_sl_dest=args.limit_sl_dest, + hardlink_dupes=args.hardlink_dupes): success = False return success else: diff --git a/test_compileall2.py b/test_compileall2.py index 8620248..4cd3a2e 100644 --- a/test_compileall2.py +++ b/test_compileall2.py @@ -12,6 +12,7 @@ import unittest import io import functools +import filecmp from unittest import mock, skipUnless try: @@ -384,6 +385,254 @@ def test_ignore_symlink_destination(self): self.assertTrue(os.path.isfile(allowed_bc)) self.assertFalse(os.path.isfile(prohibited_bc)) + def test_hardlink_deduplication_bad_args(self): + # Bad arguments combination, hardlink deduplication make sense + # only for more than one optimization level + with self.assertRaises(ValueError): + compileall.compile_dir(self.directory, quiet=True, optimize=0, hardlink_dupes=True) + + def test_hardlink_deduplication_same_bytecode_all_opt(self): + # 'a = 0' produces the same bytecode for all optimization levels + path = os.path.join(self.directory, "test", "same_all") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=True) + + # import pdb; pdb.set_trace() + + # All three files should have the same inode (hardlinks) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=False) + + # Deduplication disabled, all pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_same_bytecode_some_opt(self): + # 'a = 0' produces the same bytecode for all optimization levels + # only two levels of optimization [0, 1] tested + path = os.path.join(self.directory, "test", "same_some") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + compileall.compile_dir(path, quiet=True, optimize=[0, 2], hardlink_dupes=True) + + # Both files should have the same inode (hardlink) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, force=True, optimize=[0, 2], hardlink_dupes=False) + + # Deduplication disabled, both pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_same_bytecode_some_opt_2(self): + # 'a = 0' produces the same bytecode for all optimization levels + path = os.path.join(self.directory, "test", "same_some_2") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", "a = 0") + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + compileall.compile_dir(path, quiet=True, optimize=[1, 2], hardlink_dupes=True) + + # Both files should have the same inode (hardlinks) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, optimize=[1, 2]) + + # Deduplication disabled, all pyc files should have different inodes + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + else: + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_different_bytecode_all_opt(self): + # "'''string'''\nassert 1" produces a different bytecode for all optimization levels + path = os.path.join(self.directory, "test", "different_all") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_different_bytecode", "'''string'''\nassert 1") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=True) + + # No hardlinks, bytecodes are different + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=False) + + # Disabling hardlink deduplication makes no difference + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_different_bytecode_one_hardlink(self): + # "'''string'''\na = 1" produces the same bytecode only for level 0 and 1 + path = os.path.join(self.directory, "test", "different_one") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_different_bytecode", "'''string'''\na = 1") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=True) + + # Only level 0 and 1 has the same inode, level 2 produces a different bytecode + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + else: + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=False) + + # Deduplication disabled, no hardlinks + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_recompilation(self): + path = os.path.join(self.directory, "test", "module_change") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "module_change", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=True) + + # All three levels have the same inode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + previous_inode = os.stat(pyc_opt0).st_ino + + # Change of the module content + simple_script = script_helper.make_script(path, "module_change", "print(0)") + + # Recompilation without -o 1 + compileall.compile_dir(path, force=True, quiet=True, optimize=[0, 2], hardlink_dupes=True) + + # opt-1.pyc should have the same inode as before and others should not + if compileall.PY35: + self.assertEqual(previous_inode, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + self.assertNotEqual(previous_inode, os.stat(pyc_opt2).st_ino) + # opt-1.pyc and opt-2.pyc have different content + if compileall.PY35: + self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) + + def test_hardlink_deduplication_import(self): + path = os.path.join(self.directory, "test", "module_import") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "module", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=True) + + # All three levels have the same inode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + previous_inode = os.stat(pyc_opt0).st_ino + + # Change of the module content + simple_script = script_helper.make_script(path, "module", "print(0)") + + # Import the module in Python + script_helper.assert_python_ok( + "-O", "-c", "import module", __isolated=False, PYTHONPATH=path + ) + + # Only opt-1.pyc is changed + self.assertEqual(previous_inode, os.stat(pyc_opt0).st_ino) + if compileall.PY35: + self.assertEqual(previous_inode, os.stat(pyc_opt2).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + # opt-1.pyc and opt-2.pyc have different content + if compileall.PY35: + self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) class CompileallTestsWithSourceEpoch(CompileallTestsBase, unittest.TestCase, @@ -876,6 +1125,251 @@ def test_ignore_symlink_destination(self): self.assertTrue(os.path.isfile(allowed_bc)) self.assertFalse(os.path.isfile(prohibited_bc)) + def test_hardlink_deduplication_bad_args(self): + # Bad arguments combination, hardlink deduplication make sense + # only for more than one optimization level + self.assertRunNotOK(self.directory, "-o 1", "--hardlink_dupes") + + def test_hardlink_deduplication_same_bytecode_all_opt(self): + # 'a = 0' produces the same bytecode for all optimization levels + path = os.path.join(self.directory, "test", "same_all") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", "--hardlink-dupes") + + # All three files should have the same inode (hardlinks) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") + + # Deduplication disabled, all pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_same_bytecode_some_opt(self): + # 'a = 0' produces the same bytecode for all optimization levels + # only two levels of optimization [0, 1] tested + path = os.path.join(self.directory, "test", "same_some") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + self.assertRunOK(path, "-q", "-o 0", "-o 2", "--hardlink-dupes") + + # Both files should have the same inode (hardlink) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 0", "-o 2") + + # Deduplication disabled, both pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_same_bytecode_some_opt_2(self): + # 'a = 0' produces the same bytecode for all optimization levels + path = os.path.join(self.directory, "test", "same_some_2") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", "a = 0") + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + self.assertRunOK(path, "-q", "-o 1", "-o 2", "--hardlink-dupes") + + # Both files should have the same inode (hardlinks) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 1", "-o 2") + + # Deduplication disabled, all pyc files should have different inodes + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + else: + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_different_bytecode_all_opt(self): + # "'''string'''\nassert 1" produces a different bytecode for all optimization levels + path = os.path.join(self.directory, "test", "different_all") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_different_bytecode", "'''string'''\nassert 1") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", "--hardlink-dupes") + + # No hardlinks, bytecodes are different + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") + + # Disabling hardlink deduplication makes no difference + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_different_bytecode_one_hardlink(self): + # "'''string'''\na = 1" produces the same bytecode only for level 0 and 1 + path = os.path.join(self.directory, "test", "different_one") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_different_bytecode", "'''string'''\na = 1") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", "--hardlink-dupes") + + # Only level 0 and 1 has the same inode, level 2 produces a different bytecode + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + else: + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") + + # Deduplication disabled, no hardlinks + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + if compileall.PY35: + # Python 3.4 produces the same file for opt1 and opt2 + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_recompilation(self): + path = os.path.join(self.directory, "test", "module_change") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "module_change", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + self.assertRunOK(path, "-f", "-q", "-o 0", "-o 1", "-o 2", "--hardlink-dupes") + + # All three levels have the same inode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + previous_inode = os.stat(pyc_opt0).st_ino + + # Change of the module content + simple_script = script_helper.make_script(path, "module_change", "print(0)") + + # Recompilation without -o 1 + self.assertRunOK(path, "-f", "-q", "-o 0", "-o 2", "--hardlink-dupes") + + # opt-1.pyc should have the same inode as before and others should not + if compileall.PY35: + self.assertEqual(previous_inode, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + self.assertNotEqual(previous_inode, os.stat(pyc_opt2).st_ino) + # opt-1.pyc and opt-2.pyc have different content + if compileall.PY35: + self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) + + def test_hardlink_deduplication_import(self): + path = os.path.join(self.directory, "test", "module_import") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "module", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(1) + ) + pyc_opt2 = importlib.util.cache_from_source( + simple_script, + **compileall.optimization_kwarg(2) + ) + + self.assertRunOK(path, "-f", "-q", "-o 0", "-o 1", "-o 2", "--hardlink-dupes") + + # All three levels have the same inode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + previous_inode = os.stat(pyc_opt0).st_ino + + # Change of the module content + simple_script = script_helper.make_script(path, "module", "print(0)") + + # Import the module in Python + script_helper.assert_python_ok( + "-O", "-c", "import module", __isolated=False, PYTHONPATH=path + ) + + # Only opt-1.pyc is changed + self.assertEqual(previous_inode, os.stat(pyc_opt0).st_ino) + if compileall.PY35: + self.assertEqual(previous_inode, os.stat(pyc_opt2).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + # opt-1.pyc and opt-2.pyc have different content + if compileall.PY35: + self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) class CommmandLineTestsWithSourceEpoch(CommandLineTestsBase, unittest.TestCase,