Skip to content

Commit

Permalink
Merge branch 'master' into remove-python35-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
ogrisel committed Jun 30, 2021
2 parents d8c668b + a20b804 commit 386ae35
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 13 deletions.
7 changes: 7 additions & 0 deletions CHANGES.md
Expand Up @@ -6,6 +6,9 @@ dev

- Python 3.5 is no longer supported.

- Fix a side effect altering dynamic modules at pickling time.
([PR #426](https://github.com/cloudpipe/cloudpickle/pull/426))

- Support for pickling type annotations on Python 3.10 as per [PEP 563](
https://www.python.org/dev/peps/pep-0563/)
([PR #400](https://github.com/cloudpipe/cloudpickle/pull/400))
Expand All @@ -14,6 +17,10 @@ dev
_is_parametrized_type_hint to limit false positives.
([PR #409](https://github.com/cloudpipe/cloudpickle/pull/409))

- Suppressed a source of non-determinism when pickling dynamically defined
functions and handles the deprecation of co_lnotab in Python 3.10+.
([PR #428](https://github.com/cloudpipe/cloudpickle/pull/428))

1.6.0
=====

Expand Down
7 changes: 5 additions & 2 deletions cloudpickle/cloudpickle.py
Expand Up @@ -236,7 +236,10 @@ def _extract_code_globals(co):
out_names = _extract_code_globals_cache.get(co)
if out_names is None:
names = co.co_names
out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
# We use a dict with None values instead of a set to get a
# deterministic order (assuming Python 3.6+) and avoid introducing
# non-deterministic pickle bytes as a results.
out_names = {names[oparg]: None for _, oparg in _walk_global_ops(co)}

# Declaring a function inside another one using the "def ..."
# syntax generates a constant code object corresponding to the one
Expand All @@ -247,7 +250,7 @@ def _extract_code_globals(co):
if co.co_consts:
for const in co.co_consts:
if isinstance(const, types.CodeType):
out_names |= _extract_code_globals(const)
out_names.update(_extract_code_globals(const))

_extract_code_globals_cache[co] = out_names

Expand Down
24 changes: 21 additions & 3 deletions cloudpickle/cloudpickle_fast.py
Expand Up @@ -244,7 +244,19 @@ def _enum_getstate(obj):

def _code_reduce(obj):
"""codeobject reducer"""
if hasattr(obj, "co_posonlyargcount"): # pragma: no branch
if hasattr(obj, "co_linetable"): # pragma: no branch
# Python 3.10 and later: obj.co_lnotab is deprecated and constructor
# expects obj.co_linetable instead.
args = (
obj.co_argcount, obj.co_posonlyargcount,
obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
obj.co_flags, obj.co_code, obj.co_consts, obj.co_names,
obj.co_varnames, obj.co_filename, obj.co_name,
obj.co_firstlineno, obj.co_linetable, obj.co_freevars,
obj.co_cellvars
)
elif hasattr(obj, "co_posonlyargcount"):
# Backward compat for 3.9 and older
args = (
obj.co_argcount, obj.co_posonlyargcount,
obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
Expand All @@ -254,6 +266,7 @@ def _code_reduce(obj):
obj.co_cellvars
)
else:
# Backward compat for even older versions of Python
args = (
obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals,
obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts,
Expand Down Expand Up @@ -342,8 +355,13 @@ def _module_reduce(obj):
if _is_importable(obj):
return subimport, (obj.__name__,)
else:
obj.__dict__.pop('__builtins__', None)
return dynamic_subimport, (obj.__name__, vars(obj))
# Some external libraries can populate the "__builtins__" entry of a
# module's `__dict__` with unpicklable objects (see #316). For that
# reason, we do not attempt to pickle the "__builtins__" entry, and
# restore a default value for it at unpickling time.
state = obj.__dict__.copy()
state.pop('__builtins__', None)
return dynamic_subimport, (obj.__name__, state)


def _method_reduce(obj):
Expand Down
39 changes: 37 additions & 2 deletions tests/cloudpickle_test.py
Expand Up @@ -10,6 +10,7 @@
import logging
import math
from operator import itemgetter, attrgetter
import pickletools
import platform
import random
import shutil
Expand Down Expand Up @@ -50,13 +51,15 @@
from cloudpickle.cloudpickle import _lookup_module_and_qualname

from .testutils import subprocess_pickle_echo
from .testutils import subprocess_pickle_string
from .testutils import assert_run_python_script
from .testutils import subprocess_worker

from _cloudpickle_testpkg import relative_imports_factory


_TEST_GLOBAL_VARIABLE = "default_value"
_TEST_GLOBAL_VARIABLE2 = "another_value"


class RaiserOnPickle(object):
Expand Down Expand Up @@ -604,6 +607,12 @@ def __reduce__(self):
assert hasattr(depickled_mod.__builtins__, "abs")
assert depickled_mod.f(-1) == 1

# Additional check testing that the issue #425 is fixed: without the
# fix for #425, `mod.f` would not have access to `__builtins__`, and
# thus calling `mod.f(-1)` (which relies on the `abs` builtin) would
# fail.
assert mod.f(-1) == 1

def test_load_dynamic_module_in_grandchild_process(self):
# Make sure that when loaded, a dynamic module preserves its dynamic
# property. Otherwise, this will lead to an ImportError if pickled in
Expand Down Expand Up @@ -2089,8 +2098,8 @@ def inner_function():
return _TEST_GLOBAL_VARIABLE
return inner_function

globals_ = cloudpickle.cloudpickle._extract_code_globals(
function_factory.__code__)
globals_ = set(cloudpickle.cloudpickle._extract_code_globals(
function_factory.__code__).keys())
assert globals_ == {'_TEST_GLOBAL_VARIABLE'}

depickled_factory = pickle_depickle(function_factory,
Expand Down Expand Up @@ -2324,6 +2333,32 @@ def __type__(self):
o = MyClass()
pickle_depickle(o, protocol=self.protocol)

@pytest.mark.skipif(
sys.version_info < (3, 7),
reason="Determinism can only be guaranteed for Python 3.7+"
)
def test_deterministic_pickle_bytes_for_function(self):
# Ensure that functions with references to several global names are
# pickled to fixed bytes that do not depend on the PYTHONHASHSEED of
# the Python process.
vals = set()

def func_with_globals():
return _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE2

for i in range(5):
vals.add(
subprocess_pickle_string(func_with_globals,
protocol=self.protocol,
add_env={"PYTHONHASHSEED": str(i)}))
if len(vals) > 1:
# Print additional debug info on stdout with dis:
for val in vals:
pickletools.dis(val)
pytest.fail(
"Expected a single deterministic payload, got %d/5" % len(vals)
)


class Protocol2CloudPickleTest(CloudPickleTest):

Expand Down
30 changes: 24 additions & 6 deletions tests/testutils.py
Expand Up @@ -2,7 +2,6 @@
import os
import os.path as op
import tempfile
import base64
from subprocess import Popen, check_output, PIPE, STDOUT, CalledProcessError
from cloudpickle.compat import pickle
from contextlib import contextmanager
Expand Down Expand Up @@ -38,15 +37,16 @@ def _make_cwd_env():
return cloudpickle_repo_folder, env


def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT):
"""Echo function with a child Python process
def subprocess_pickle_string(input_data, protocol=None, timeout=TIMEOUT,
add_env=None):
"""Retrieve pickle string of an object generated by a child Python process
Pickle the input data into a buffer, send it to a subprocess via
stdin, expect the subprocess to unpickle, re-pickle that data back
and send it back to the parent process via stdout for final unpickling.
>>> subprocess_pickle_echo([1, 'a', None])
[1, 'a', None]
>>> testutils.subprocess_pickle_string([1, 'a', None], protocol=2)
b'\x80\x02]q\x00(K\x01X\x01\x00\x00\x00aq\x01Ne.'
"""
# run then pickle_echo(protocol=protocol) in __main__:
Expand All @@ -56,6 +56,8 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT):
# which is deprecated in python 3.8
cmd = [sys.executable, '-W ignore', __file__, "--protocol", str(protocol)]
cwd, env = _make_cwd_env()
if add_env:
env.update(add_env)
proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env,
bufsize=4096)
pickle_string = dumps(input_data, protocol=protocol)
Expand All @@ -67,14 +69,30 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT):
message = "Subprocess returned %d: " % proc.returncode
message += err.decode('utf-8')
raise RuntimeError(message)
return loads(out)
return out
except TimeoutExpired as e:
proc.kill()
out, err = proc.communicate()
message = u"\n".join([out.decode('utf-8'), err.decode('utf-8')])
raise RuntimeError(message) from e


def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT,
add_env=None):
"""Echo function with a child Python process
Pickle the input data into a buffer, send it to a subprocess via
stdin, expect the subprocess to unpickle, re-pickle that data back
and send it back to the parent process via stdout for final unpickling.
>>> subprocess_pickle_echo([1, 'a', None])
[1, 'a', None]
"""
out = subprocess_pickle_string(input_data,
protocol=protocol,
timeout=timeout,
add_env=add_env)
return loads(out)


def _read_all_bytes(stream_in, chunk_size=4096):
all_data = b""
while True:
Expand Down

0 comments on commit 386ae35

Please sign in to comment.