Skip to content

Commit

Permalink
Handle and test when __reduce__ returns a string (#77)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jan Janssen <jan-janssen@users.noreply.github.com>
  • Loading branch information
3 people committed Feb 6, 2024
1 parent bd6407c commit 5353e06
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 4 deletions.
54 changes: 50 additions & 4 deletions h5io/_h5io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
#
# License: BSD (3-clause)

import copyreg
import datetime
import importlib
import inspect
import json
import sys
import tempfile
Expand Down Expand Up @@ -371,7 +373,21 @@ def _triage_write(
reduced = value.__reduce__()
# Some objects reduce to simply the reconstructor function and its
# arguments, without any state
if len(reduced) == 2:
if isinstance(reduced, str):
# https://docs.python.org/3/library/pickle.html#object.__reduce__
# > If a string is returned, the string should be interpreted as the
# > name of a global variable. It should be the object’s local name
# > relative to its module; the pickle module searches the module
# > namespace to determine the object’s module. This behaviour is
# > typically useful for singletons.
# In this case, override the class type to get the global, and manually
# set the reconstructor variable so this doesn't look "custom"
class_type = value.__class__.__module__ + "." + reduced

reconstructor = copyreg._reconstructor
state = value.__getstate__()
additional = []
elif len(reduced) == 2:
# some objects do not return their internal state via
# __reduce__, but can be reconstructed anyway by assigned the
# return value from __getstate__ to __dict__, so we call it
Expand Down Expand Up @@ -406,6 +422,7 @@ def _triage_write(
if state is None:
# For a class that has no instance __dict__ and no __slots__,
# the default state is None.
_guard_string_reductions(reduced, value, class_type, {})
return
elif isinstance(state, dict):
# For a class that has an instance __dict__ and no __slots__,
Expand All @@ -431,6 +448,9 @@ def _triage_write(
# When the __getstate__() was overwritten and no dict is
# returned raise a TypeError
raise TypeError("__getstate__() did not return a state dictionary.")

_guard_string_reductions(reduced, value, class_type, state_dict)

for key, value in state_dict.items():
_triage_write(
key,
Expand Down Expand Up @@ -885,15 +905,27 @@ def _import_class(class_type):


def _setstate(obj_class, state_dict):
got_a_class = inspect.isclass(obj_class)
if hasattr(obj_class, "__getnewargs_ex__"):
args, kwargs = obj_class.__getnewargs_ex__(obj_class)
if got_a_class:
args, kwargs = obj_class.__getnewargs_ex__(obj_class)
else: # Self is first argument
args, kwargs = obj_class.__getnewargs_ex__()
elif hasattr(obj_class, "__getnewargs__"):
args = obj_class.__getnewargs__(obj_class)
if got_a_class:
args = obj_class.__getnewargs__(obj_class)
else: # Self is first argument
args = obj_class.__getnewargs__()
kwargs = {}
else:
args = ()
kwargs = {}
obj = obj_class.__new__(obj_class, *args, **kwargs)
if got_a_class:
obj = obj_class.__new__(obj_class, *args, **kwargs)
else:
# We got an object which is not a class - it could be a singleton-like object -
# we just return the object without initialisation.
obj = obj_class
if hasattr(obj, "__setstate__"):
obj.__setstate__(state_dict)
elif hasattr(obj, "__dict__"):
Expand All @@ -906,3 +938,17 @@ def _setstate(obj_class, state_dict):
"Unexpected state signature, h5io is unable to restore the object."
)
return obj


def _guard_string_reductions(reduced, value, class_type, state_dict):
# use_state adheres to pickling behaviour throughout, and pickle throws a
# `PicklingError` when `__reduce__` returns a string but the reduced object is not
# the object being pickled, so we do the same
if isinstance(reduced, str):
reduced_obj = _setstate(
_import_class(class_type=class_type), state_dict=state_dict
)
if reduced_obj is not value:
raise ValueError(
f"Can't write {value}: it's not the same object as {reduced}"
)
71 changes: 71 additions & 0 deletions h5io/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime
import sys
from abc import ABCMeta
from io import UnsupportedOperation
from pathlib import Path

Expand Down Expand Up @@ -499,3 +500,73 @@ def test_state_python_version_error(tmp_path):
overwrite="update",
use_state=True,
)


class StringReduceMismatch:
"""A class to test the case of `__reduce__` returning a string."""

def __reduce__(self):
"""
Return a string associated with a local object to return.
Pickle requires that the object found by importing a returned reduce string
be the _same object_ getting reduced. We follow this requirement and expect
instances of this class to not store.
"""
return "what_gets_loaded" # The associated global variable


what_gets_loaded = {"the_point_is": "this is not the same object"}


class Singleton(ABCMeta):
"""A singleton metaclass for testing when `__reduce__` returns a string."""

_instances = {}

def __call__(cls, *args, **kwargs):
"""If there is an existing instance, return that instead of a new one."""
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]


class SingletonReduce(metaclass=Singleton):
"""A singleton class for testing when `__reduce__` returns a string."""

def __reduce__(self):
"""Return a global."""
return "MY_SINGLETON"


MY_SINGLETON = SingletonReduce()


@pytest.mark.skipif(sys.version_info < (3, 11), reason="requires python3.11 or higher")
def test_state_with_singleton(tmp_path):
"""When __reduce__ returns a string, load the identical object."""
test_file = tmp_path / "test.hdf5"

string_reduce_mismatch_instance = StringReduceMismatch()
assert string_reduce_mismatch_instance is not what_gets_loaded # Sanity check
pytest.raises(
ValueError,
write_hdf5,
fname=test_file,
data=string_reduce_mismatch_instance,
overwrite=True,
use_state=True,
) # Should not be allowed to save when the __reduce__ string does not give
# the same object that's being reduced

singleton_string_reduce = SingletonReduce()
write_hdf5(
fname=test_file,
data=singleton_string_reduce,
overwrite=True,
use_json=False,
use_state=True,
)

reloaded = read_hdf5(fname=test_file)
assert reloaded is MY_SINGLETON # The _exact_ object specified in __reduce__

0 comments on commit 5353e06

Please sign in to comment.