Skip to content

Commit

Permalink
use functools.cached_property on >=3.12 (#511)
Browse files Browse the repository at this point in the history
- Also removes shortuuid, and uses `tmp_fname` from `dvc_objects`.
- After removing funcy.cached_property, the only use of
  funcy was for `split`, which has been refactored as well.
  So, we also don't need to depend on funcy now.
  • Loading branch information
skshetry committed Feb 28, 2024
1 parent d89a3f2 commit c25b608
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 32 deletions.
7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ classifiers = [
requires-python = ">=3.9"
dynamic = ["version"]
dependencies = [
"funcy>=1.14",
"funcy>=1.14; python_version < '3.12'",
"dictdiffer>=0.8.1",
"pygtrie>=2.3.2",
"shortuuid>=0.5.0",
"dvc-objects>=4.0.1,<6",
"fsspec>=2024.2.0",
"diskcache>=5.2.1",
Expand Down Expand Up @@ -191,8 +190,8 @@ parametrize-names-type = "csv"

[tool.ruff.lint.flake8-tidy-imports]
[tool.ruff.lint.flake8-tidy-imports.banned-api]
"funcy.cached_property" = {msg = "use `from dvc_data.utils import cached_property` instead."}
"functools.cached_property" = {msg = "use `from dvc_data.utils import cached_property` instead."}
"funcy.cached_property" = {msg = "use `from dvc_data.compat import cached_property` instead."}
"functools.cached_property" = {msg = "use `from dvc_data.compat import cached_property` instead."}

[tool.ruff.lint.flake8-type-checking]
strict = true
Expand Down
3 changes: 2 additions & 1 deletion src/dvc_data/utils.py → src/dvc_data/compat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys
from typing import TYPE_CHECKING

if TYPE_CHECKING:
if sys.version_info >= (3, 12) or TYPE_CHECKING:
from functools import cached_property # noqa: TID251
else:
from funcy import cached_property # noqa: TID251
Expand Down
5 changes: 2 additions & 3 deletions src/dvc_data/hashfile/db/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@

from dvc_objects.db import noop, wrap_iter
from dvc_objects.errors import ObjectDBError, ObjectFormatError
from dvc_objects.fs.utils import copyfile, remove
from dvc_objects.fs.utils import copyfile, remove, tmp_fname
from fsspec.callbacks import DEFAULT_CALLBACK
from shortuuid import uuid

from . import HashFileDB

Expand Down Expand Up @@ -83,7 +82,7 @@ def _remove_unpacked_dir(self, hash_):
def _unprotect_file(self, path, callback=DEFAULT_CALLBACK):
if self.fs.is_symlink(path) or self.fs.is_hardlink(path):
logger.debug("Unprotecting '%s'", path)
tmp = os.path.join(os.path.dirname(path), "." + uuid())
tmp = os.path.join(os.path.dirname(path), tmp_fname())

# The operations order is important here - if some application
# would access the file during the process of copyfile then it
Expand Down
18 changes: 11 additions & 7 deletions src/dvc_data/hashfile/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
)

from fsspec.callbacks import DEFAULT_CALLBACK
from funcy import split

from .hash_info import HashInfo

Expand Down Expand Up @@ -56,7 +55,7 @@ def find_tree_by_obj_id(
return None


def _do_transfer(
def _do_transfer( # noqa: C901
src: "HashFileDB",
dest: "HashFileDB",
obj_ids: Iterable["HashInfo"],
Expand All @@ -71,18 +70,23 @@ def _do_transfer(
Returns:
Set containing any hash_infos which failed to transfer.
"""
dir_ids, file_ids = split(lambda hash_info: hash_info.isdir, obj_ids)
dir_ids, file_ids = set(), set()
for hash_info in obj_ids:
if hash_info.isdir:
dir_ids.add(hash_info)
else:
file_ids.add(hash_info)

failed_ids: set["HashInfo"] = set()
succeeded_dir_objs = []
all_file_ids = set(file_ids)

for dir_hash in dir_ids:
dir_obj = find_tree_by_obj_id([cache_odb, src], dir_hash)
assert dir_obj

entry_ids = {oid for _, _, oid in dir_obj}
bound_file_ids = all_file_ids & entry_ids
all_file_ids -= entry_ids
bound_file_ids = file_ids & entry_ids
file_ids -= entry_ids

logger.debug("transfer dir: %s with %d files", dir_hash, len(bound_file_ids))

Expand Down Expand Up @@ -114,7 +118,7 @@ def _do_transfer(
succeeded_dir_objs.append(dir_obj)

# insert the rest
failed_ids.update(_add(src, dest, all_file_ids, **kwargs))
failed_ids.update(_add(src, dest, file_ids, **kwargs))
if failed_ids:
if src_index:
src_index.clear()
Expand Down
2 changes: 1 addition & 1 deletion src/dvc_data/hashfile/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

from dvc_objects.errors import ObjectFormatError

from dvc_data.compat import cached_property
from dvc_data.hashfile.hash import DEFAULT_ALGORITHM, hash_file
from dvc_data.hashfile.meta import Meta
from dvc_data.hashfile.obj import HashFile
from dvc_data.utils import cached_property

if TYPE_CHECKING:
from pygtrie import Trie
Expand Down
17 changes: 3 additions & 14 deletions src/dvc_data/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,15 @@
import os
from abc import ABC, abstractmethod
from collections.abc import Iterator, MutableMapping
from typing import (
TYPE_CHECKING,
Any,
Callable,
Optional,
cast,
)
from typing import TYPE_CHECKING, Any, Callable, Optional, cast

import attrs
from sqltrie import (
JSONTrie,
PyGTrie,
ShortKeyError,
SQLiteTrie,
)
from sqltrie import JSONTrie, PyGTrie, ShortKeyError, SQLiteTrie

from dvc_data.compat import cached_property
from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.hashfile.meta import Meta
from dvc_data.hashfile.tree import Tree
from dvc_data.utils import cached_property

if TYPE_CHECKING:
from dvc_objects.fs.base import FileSystem
Expand Down
3 changes: 1 addition & 2 deletions tests/hashfile/test_db_index.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import pytest
from funcy import first

from dvc_data.hashfile.db.index import ObjectDBIndex

Expand All @@ -22,7 +21,7 @@ def test_roundtrip(tmp_upath, index):
def test_clear(index):
index.update(["1234.dir"], ["5678"])
index.clear()
assert first(index.hashes()) is None
assert not list(index.hashes())


def test_update(index):
Expand Down

0 comments on commit c25b608

Please sign in to comment.