Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TEST: Lazy tree as default #1728

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 26 additions & 3 deletions asdf/_asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
import pathlib
import time
import warnings
import weakref

from packaging.version import Version

from . import _compression as mcompression
from . import _display as display
from . import _node_info as node_info
from . import _version as version
from . import constants, generic_io, reference, schema, treeutil, util, versioning, yamlutil
from . import constants, generic_io, lazy_nodes, reference, schema, treeutil, util, versioning, yamlutil
from ._block.manager import Manager as BlockManager
from ._helpers import validate_version
from .config import config_context, get_config
Expand Down Expand Up @@ -174,6 +175,8 @@ def __init__(
# custom_tree_to_tagged_tree or tagged_tree_to_custom_tree).
self._tree_modification_context = treeutil._TreeModificationContext()

self._tagged_object_cache = {}

self._fd = None
self._closed = False
self._external_asdf_by_uri = {}
Expand Down Expand Up @@ -478,6 +481,7 @@ def close(self):
# as we're closing the file, also empty out the
# tree so that references to array data can be released
self._tree = AsdfObject()
self._tagged_object_cache = {}
for external in self._external_asdf_by_uri.values():
external.close()
self._external_asdf_by_uri.clear()
Expand Down Expand Up @@ -780,6 +784,7 @@ def _open_asdf(
fd,
validate_checksums=False,
extensions=None,
lazy_tree=NotSet,
_get_yaml_content=False,
_force_raw_types=False,
strict_extension_check=False,
Expand All @@ -791,7 +796,7 @@ def _open_asdf(
msg = "'strict_extension_check' and 'ignore_missing_extensions' are incompatible options"
raise ValueError(msg)

with config_context():
with config_context() as cfg:
# validate_checksums (unlike memmap and lazy_load) is provided
# here instead of in __init__
self._blocks._validate_checksums = validate_checksums
Expand Down Expand Up @@ -870,7 +875,14 @@ def _open_asdf(
self.close()
raise

tree = yamlutil.tagged_tree_to_custom_tree(tree, self, _force_raw_types)
if lazy_tree is NotSet:
lazy_tree = cfg.lazy_tree
if lazy_tree and not _force_raw_types:
obj = AsdfObject()
obj.data = lazy_nodes.AsdfDictNode(tree, weakref.ref(self))
tree = obj
else:
tree = yamlutil.tagged_tree_to_custom_tree(tree, self, _force_raw_types)

if not (ignore_missing_extensions or _force_raw_types):
self._check_extensions(tree, strict=strict_extension_check)
Expand All @@ -888,6 +900,7 @@ def _open_impl(
mode="r",
validate_checksums=False,
extensions=None,
lazy_tree=NotSet,
_get_yaml_content=False,
_force_raw_types=False,
strict_extension_check=False,
Expand All @@ -902,6 +915,7 @@ def _open_impl(
generic_file,
validate_checksums=validate_checksums,
extensions=extensions,
lazy_tree=lazy_tree,
_get_yaml_content=_get_yaml_content,
_force_raw_types=_force_raw_types,
strict_extension_check=strict_extension_check,
Expand Down Expand Up @@ -1509,6 +1523,7 @@ def open_asdf(
_force_raw_types=False,
copy_arrays=False,
memmap=NotSet,
lazy_tree=NotSet,
lazy_load=True,
custom_schema=None,
strict_extension_check=False,
Expand Down Expand Up @@ -1570,6 +1585,13 @@ def open_asdf(
Note: even if ``lazy_load`` is `False`, ``memmap`` is still taken
into account.

lazy_tree : bool, optional
When `True` the ASDF tree will not be converted to custom objects
when the file is loaded. Instead, objects will be "lazily" converted
only when they are accessed. Note that the tree will not contain dict
and list instances for containers and instead return instances of classes
defined in `asdf.lazy_nodes`.

custom_schema : str, optional
Path to a custom schema file that will be used for a secondary
validation pass. This can be used to ensure that particular ASDF
Expand Down Expand Up @@ -1623,6 +1645,7 @@ def open_asdf(
mode=mode,
validate_checksums=validate_checksums,
extensions=extensions,
lazy_tree=lazy_tree,
_get_yaml_content=_get_yaml_content,
_force_raw_types=_force_raw_types,
strict_extension_check=strict_extension_check,
Expand Down
14 changes: 8 additions & 6 deletions asdf/_tests/core/_converters/test_complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,18 @@ def test_valid_nan_complex(valid):
pass


def test_roundtrip():
def test_roundtrip(tmp_path):
values = {
"a": 0 + 0j,
"b": 1 + 1j,
"c": -1 + 1j,
"d": -1 - 1j,
}

result = helpers.roundtrip_object(values)

assert len(values) == len(result)
for key, value in values.items():
assert result[key] == value
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"values": values}).write_to(fn)
with asdf.open(fn) as af:
result = af["values"]
assert len(values) == len(result)
for key, value in values.items():
assert result[key] == value
9 changes: 5 additions & 4 deletions asdf/_tests/core/_converters/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,18 @@ def test_software():
assert result == software


def test_history_entry():
def test_history_entry(tmp_path):
history_entry = HistoryEntry(
description="Some history happened here",
time=datetime.datetime.now(),
software=[Software(name="FooSoft", version="1.5.0")],
extra="property",
)

result = helpers.roundtrip_object(history_entry)

assert result == history_entry
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"obj": history_entry}).write_to(fn)
with asdf.open(fn) as af:
assert af["obj"] == history_entry


def test_subclass_metadata():
Expand Down
5 changes: 3 additions & 2 deletions asdf/_tests/tags/core/tests/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,8 +896,9 @@ def test_inline_shape_mismatch():
"""

buff = helpers.yaml_to_asdf(content)
with pytest.raises(ValueError, match=r"inline data doesn't match the given shape"), asdf.open(buff):
pass
with pytest.raises(ValueError, match=r"inline data doesn't match the given shape"):
with asdf.open(buff) as af:
af["arr"]


def test_broadcasted_array(tmpdir):
Expand Down
6 changes: 4 additions & 2 deletions asdf/_tests/test_block_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,10 @@ def test_block_data_callback_converter(tmp_path):
# id(arr) would change every time
a = BlockDataCallback(lambda: np.zeros(3, dtype="uint8"))

b = helpers.roundtrip_object(a)
assert_array_equal(a.data, b.data)
tfn = tmp_path / "tmp.asdf"
asdf.AsdfFile({"obj": a}).write_to(tfn)
with asdf.open(tfn) as af:
assert_array_equal(a.data, af["obj"].data)

# make a tree without the BlockData instance to avoid
# the initial validate which will trigger block allocation
Expand Down
10 changes: 7 additions & 3 deletions asdf/_tests/test_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from packaging.specifiers import SpecifierSet
from yaml.representer import RepresenterError

import asdf
from asdf import AsdfFile, config_context
from asdf.exceptions import AsdfWarning, ValidationError
from asdf.extension import (
Expand Down Expand Up @@ -897,7 +898,7 @@ def from_yaml_tree(self, node, tag, ctx):
config.add_extension(extension)


def test_reference_cycle():
def test_reference_cycle(tmp_path):
class FractionWithInverse(fractions.Fraction):
def __init__(self, *args, **kwargs):
self._inverse = None
Expand Down Expand Up @@ -939,5 +940,8 @@ class FractionWithInverseExtension:
f1.inverse = f2
f2.inverse = f1

read_f1 = roundtrip_object(f1)
assert read_f1.inverse.inverse is read_f1
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"obj": f1}).write_to(fn)
with asdf.open(fn) as af:
read_f1 = af["obj"]
assert read_f1.inverse.inverse is read_f1
168 changes: 168 additions & 0 deletions asdf/_tests/test_lazy_nodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import collections
import copy
import json
import weakref

import numpy as np
import pytest

import asdf
from asdf.lazy_nodes import AsdfDictNode, AsdfListNode, AsdfOrderedDictNode


def test_slice_access():
af = asdf.AsdfFile()
node = AsdfListNode([0, 1, 2], weakref.ref(af))
assert node[0] == 0
assert node[1] == 1
assert node[2] == 2
assert node[:2] == [0, 1]
assert isinstance(node[:2], AsdfListNode)
assert node[1:2] == [
1,
]
assert isinstance(node[1:2], AsdfListNode)
assert node[:-1] == [0, 1]
assert isinstance(node[:-1], AsdfListNode)
assert node[::-1] == [2, 1, 0]
assert isinstance(node[::-1], AsdfListNode)
assert node[::2] == [0, 2]
assert isinstance(node[::2], AsdfListNode)
assert node[1::2] == [
1,
]
assert isinstance(node[1::2], AsdfListNode)


def test_nested_node_conversion():
tree = {
# lll = list in list in list, etc...
"lll": [[[0]]],
"lld": [[{"a": 0}]],
"ldl": [{"a": [0]}],
"ldd": [{"a": {"a": [0]}}],
"dll": {"a": [[0]]},
"dld": {"a": [{"a": 0}]},
"ddl": {"a": {"a": [0]}},
"ddd": {"a": {"a": {"a": 0}}},
}
af = asdf.AsdfFile()
node = AsdfDictNode(tree, weakref.ref(af))
for key in node:
obj = node[key]
for code in key:
if code == "l":
assert isinstance(obj, AsdfListNode)
obj = obj[0]
else:
assert isinstance(obj, AsdfDictNode)
obj = obj["a"]


def test_lazy_tree_ref(tmp_path):
fn = tmp_path / "test.asdf"

arr = np.arange(42)
tree = {
"a": arr,
"b": {"c": arr},
"d": [
arr,
],
}

af = asdf.AsdfFile(tree)
af.write_to(fn)

with asdf.open(fn, lazy_tree=True) as af:
assert isinstance(af.tree.data.data["a"], asdf.tagged.Tagged)
assert isinstance(af.tree.data.data["b"]["c"], asdf.tagged.Tagged)
assert isinstance(af.tree.data.data["d"][0], asdf.tagged.Tagged)
assert isinstance(af["b"], AsdfDictNode)
assert isinstance(af["d"], AsdfListNode)
np.testing.assert_array_equal(af["a"], arr)
assert af["a"] is af["b"]["c"]
assert af["a"] is af["d"][0]


def test_ordered_dict():
tree = {"a": collections.OrderedDict({"b": [1, 2, collections.OrderedDict({"c": 3})]})}

af = asdf.AsdfFile()

node = AsdfDictNode(tree, weakref.ref(af))
assert isinstance(node["a"], AsdfOrderedDictNode)
assert isinstance(node["a"]["b"], AsdfListNode)
assert isinstance(node["a"]["b"][2], AsdfOrderedDictNode)


@pytest.mark.parametrize(
"NodeClass,data,base",
[
(AsdfDictNode, {"a": 1}, dict),
(AsdfListNode, [1, 2], list),
(AsdfOrderedDictNode, {"a": 1}, collections.OrderedDict),
],
)
def test_node_inheritance(NodeClass, data, base):
node = NodeClass(data)
assert isinstance(node, base)


@pytest.mark.parametrize(
"node",
[
AsdfDictNode({"a": 1, "b": 2}),
AsdfListNode([1, 2, 3]),
AsdfOrderedDictNode({"a": 1, "b": 2}),
],
)
@pytest.mark.parametrize("copy_operation", [copy.copy, copy.deepcopy])
def test_copy(node, copy_operation):
copied_node = copy_operation(node)
assert isinstance(copied_node, type(node))
assert copied_node == node


@pytest.mark.parametrize(
"node",
[
AsdfDictNode({"a": 1, "b": 2}),
AsdfListNode([1, 2, 3]),
AsdfOrderedDictNode({"a": 1, "b": 2}),
],
)
def test_json_serialization(node):
with pytest.raises(TypeError, match="is not JSON serializable"):
json.dumps(node)


def test_cache_clear_on_close(tmp_path):
fn = tmp_path / "test.asdf"

arr = np.arange(42)
tree = {"a": arr}
asdf.AsdfFile(tree).write_to(fn)

with asdf.open(fn, lazy_tree=True) as af:
# grab a weakref to this array, it should fail
# to resolve after the with exits
ref = weakref.ref(af["a"])

assert ref() is None


def test_access_after_del(tmp_path):
fn = tmp_path / "test.asdf"

arr = np.arange(42)
tree = {"a": {"b": arr}}
asdf.AsdfFile(tree).write_to(fn)

with asdf.open(fn, lazy_tree=True) as af:
d = af["a"]

del af

with pytest.raises(Exception, match="no ASDF for you!"):
d["b"]
5 changes: 3 additions & 2 deletions asdf/_tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,9 @@ class CustomExtension:
# This should cause a warning but not an error because without explicitly
# providing an extension, our custom type will not be recognized and will
# simply be converted to a raw type.
with pytest.warns(AsdfConversionWarning, match=tag_uri), asdf.open(buff):
pass
with pytest.warns(AsdfConversionWarning, match=tag_uri):
with asdf.open(buff) as af:
af["custom"]

buff.seek(0)
with config_context() as cfg:
Expand Down