Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

- Added rich comparison operator support to cloud paths, which means you can now use them with `sorted`.
- Fixed bug where `hash(...)` of a cloud path was not consistent with the equality operator.
- Added polymorphic class `AnyPath` which creates a cloud path or `pathlib.Path` instance appropriately for an input filepath. See new [documentation](http://https://cloudpathlib.drivendata.org/anypath-polymorphism/) for details and example usage.
- Added integration with [Pydantic](https://pydantic-docs.helpmanual.io/). See new [documentation](http://https://cloudpathlib.drivendata.org/integrations/#pydantic) for details and example usage.

## v0.3.0 (2021-01-29)

Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
include README.md
include LICENSE
include requirements/*.txt
include requirements.txt
2 changes: 2 additions & 0 deletions cloudpathlib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys

from .anypath import AnyPath
from .azure.azblobclient import AzureBlobClient
from .azure.azblobpath import AzureBlobPath
from .cloudpath import CloudPath, implementation_registry
Expand Down Expand Up @@ -29,6 +30,7 @@
)

__all__ = [
"AnyPath",
"AzureBlobClient",
"AzureBlobPath",
"ClientMismatch",
Expand Down
61 changes: 61 additions & 0 deletions cloudpathlib/anypath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from pathlib import Path
from typing import Union

from .cloudpath import InvalidPrefix, CloudPath


class AnyPathTypeError(TypeError):
pass


class AnyPathMeta(type):
"""Metaclass for AnyPath that implements special methods so that AnyPath works as a virtual
superclass when using isinstance or issubclass checks on CloudPath or Path inputs. See
[PEP 3119](https://www.python.org/dev/peps/pep-3119/#overloading-isinstance-and-issubclass)."""

def __instancecheck__(cls, inst):
return isinstance(inst, CloudPath) or isinstance(inst, Path)

def __subclasscheck__(cls, sub):
return issubclass(sub, CloudPath) or issubclass(sub, Path)


class AnyPath(metaclass=AnyPathMeta):
"""Polymorphic virtual superclass for CloudPath and pathlib.Path. Constructing an instance will
automatically dispatch to CloudPath or Path based on the input. It also supports both
isinstance and issubclass checks.

This class also integrates with Pydantic. When used as a type declaration for a Pydantic
BaseModel, the Pydantic validation process will appropriately run inputs through this class'
constructor and dispatch to CloudPath or Path.
"""

def __new__(cls, *args, **kwargs) -> Union[CloudPath, Path]: # type: ignore
try:
return CloudPath(*args, **kwargs) # type: ignore
except InvalidPrefix as cloudpath_exception:
try:
return Path(*args, **kwargs)
except TypeError as path_exception:
raise AnyPathTypeError(
" ".join(
[
"Invalid input for both CloudPath and Path.",
f"CloudPath exception: {repr(cloudpath_exception)}",
f"Path exception: {repr(path_exception)}",
]
)
)

@classmethod
def __get_validators__(cls):
"""Pydantic special method. See
https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
yield cls._validate

@classmethod
def _validate(cls, value) -> Union[CloudPath, Path]:
"""Used as a Pydantic validator. See
https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
# Note __new__ is static method and not a class method
return cls.__new__(cls, value)
13 changes: 13 additions & 0 deletions cloudpathlib/cloudpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,19 @@ def _upload_local_to_cloud(self, force_overwrite_to_cloud: bool = False):
f"overwrite."
)

# =========== pydantic integration special methods ===============
@classmethod
def __get_validators__(cls):
"""Pydantic special method. See
https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
yield cls._validate

@classmethod
def _validate(cls, value: Any):
"""Used as a Pydantic validator. See
https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
return cls(value)


# The function resolve is not available on Pure paths because it removes relative
# paths and symlinks. We _just_ want the relative path resolution for
Expand Down
33 changes: 33 additions & 0 deletions docs/docs/anypath-polymorphism.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# AnyPath Polymorphic Class

`cloudpathlib` implements a special `AnyPath` polymorphic class. This class will automatically instantiate a cloud path instance or a `pathlib.Path` instance appropriately from your input. It's also a virtual superclass of `CloudPath` and `Path`, so `isinstance` and `issubclass` checks will work in the expected way.

This functionality can be handy for situations when you want to support both local filepaths and cloud storage filepaths. If you use `AnyPath`, your code can switch between them seamlessly based on the contents of provided filepaths with needing any `if`-`else` conditional blocks.

Comment thread
jayqi marked this conversation as resolved.
## Example

```python
from cloudpathlib import AnyPath

path = AnyPath("mydir/myfile.txt")
path
#> PosixPath('mydir/myfile.txt')

cloud_path = AnyPath("s3://mybucket/myfile.txt")
cloud_path
#> S3Path('s3://mybucket/myfile.txt')

isinstance(path, AnyPath)
#> True
isinstance(cloud_path, AnyPath)
#> True
```

## How It Works

The constructor for `AnyPath` will first attempt to run the input through the `CloudPath` base class' constructor, which will validate the input against registered concrete `CloudPath` implementations. This will accept inputs that are already a cloud path class or a string with the appropriate URI scheme prefix (e.g., `s3://`). If no implementation validates successfully, it will then try to run the input through the `Path` constructor. If the `Path` constructor fails and raises a `TypeError`, then the `AnyPath` constructor will raise an `AnyPathTypeError` exception.

The virtual superclass functionality with `isinstance` and `issubclass` with the `__instancecheck__` and `__subclasscheck__` special methods per [PEP 3119](https://www.python.org/dev/peps/pep-3119/#overloading-isinstance-and-issubclass)'s specification.

---
<sup>Examples created with [reprexlite](https://github.com/jayqi/reprexlite)</sup>
3 changes: 3 additions & 0 deletions docs/docs/api-reference/anypath.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# AnyPath

::: cloudpathlib.anypath.AnyPath
38 changes: 38 additions & 0 deletions docs/docs/integrations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Integrations with Other Libraries

## Pydantic

`cloudpathlib` integrates with [Pydantic](https://pydantic-docs.helpmanual.io/)'s data validation. You can declare fields with cloud path classes, and Pydantic's validation mechanisms will run inputs through the cloud path's constructor.

```python
from cloudpathlib import S3Path
from pydantic import BaseModel

class MyModel(BaseModel):
s3_file: S3Path

inst = MyModel(s3_file="s3://mybucket/myfile.txt")
inst.s3_file
#> S3Path('s3://mybucket/myfile.txt')
```

This also works with the `AnyPath` polymorphic class. Inputs will get dispatched and instantiated as the appropriate class.

```python
from cloudpathlib import AnyPath
from pydantic import BaseModel

class FancyModel(BaseModel):
path: AnyPath

fancy1 = FancyModel(path="s3://mybucket/myfile.txt")
fancy1.path
#> S3Path('s3://mybucket/myfile.txt')

fancy2 = FancyModel(path="mydir/myfile.txt")
fancy2.path
#> PosixPath('mydir/myfile.txt')
```

---
<sup>Examples created with [reprexlite](https://github.com/jayqi/reprexlite)</sup>
3 changes: 3 additions & 0 deletions docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ nav:
- Why cloudpathlib?: "why_cloudpathlib.ipynb"
- Authentication: "authentication.md"
- Caching: "caching.ipynb"
- AnyPath: "anypath-polymorphism.md"
- Testing code that uses cloudpathlib: "testing_mocked_cloudpathlib.ipynb"
- Integrations: "integrations.md"
- API Reference:
- CloudPath: "api-reference/cloudpath.md"
- S3:
Expand All @@ -27,6 +29,7 @@ nav:
- GS:
- GSClient: "api-reference/gsclient.md"
- GSPath: "api-reference/gspath.md"
- AnyPath: "api-reference/anypath.md"
- Local: "api-reference/local.md"

markdown_extensions:
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mkdocstrings
mypy
pandas
pillow
pydantic
pytest
pytest-cases
pytest-cov
Expand Down
11 changes: 11 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Base requirements for library
importlib_metadata ; python_version < "3.8"

## extras: azure
azure-storage-blob>=12

## extras: gs
google-cloud-storage

## extras: s3
boto3
1 change: 0 additions & 1 deletion requirements/azure.txt

This file was deleted.

2 changes: 0 additions & 2 deletions requirements/base.txt

This file was deleted.

1 change: 0 additions & 1 deletion requirements/gs.txt

This file was deleted.

1 change: 0 additions & 1 deletion requirements/s3.txt

This file was deleted.

28 changes: 14 additions & 14 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,36 @@

"""The setup script."""

from collections import defaultdict
from setuptools import setup, find_packages
from itertools import chain
from pathlib import Path


def load_requirements(path: Path):
requirements = []
requirements = defaultdict(list)
with path.open("r") as fp:
reqs_type = "base"
for line in fp.readlines():
if line.startswith("## extras:"):
reqs_type = line.partition(":")[-1].strip()
if reqs_type in ("base", "all"):
raise ValueError(f"'{reqs_type}' is a reserved extras keyword.")
if line.startswith("-r"):
requirements += load_requirements(line.split(" ")[1].strip())
requirements += load_requirements(line.split(" ")[1].strip())["base"]
else:
requirement = line.strip()
if requirement and not requirement.startswith("#"):
requirements.append(requirement)
requirements[reqs_type].append(requirement)
return requirements


readme = Path("README.md").read_text(encoding="UTF-8")

extra_reqs = {}
for req_path in (Path(__file__).parent / "requirements").glob("*.txt"):
if req_path.stem == "base":
base_reqs = load_requirements(req_path)
continue
if req_path.stem == "all":
raise ValueError("'all' is a reserved keyword and can't be used for a cloud provider key")
extra_reqs[req_path.stem] = load_requirements(req_path)
requirements = load_requirements(Path(__file__).parent / "requirements.txt")
extra_reqs = {k: v for k, v in requirements.items() if k != "base"}
extra_reqs["all"] = list(chain(*extra_reqs.values()))

readme = Path("README.md").read_text(encoding="UTF-8")

setup(
author="DrivenData",
author_email="info@drivendata.org",
Expand All @@ -47,7 +47,7 @@ def load_requirements(path: Path):
],
description=("pathlib-style classes for cloud storage services"),
extras_require=extra_reqs,
install_requires=base_reqs,
install_requires=requirements["base"],
long_description=readme,
long_description_content_type="text/markdown",
include_package_data=True,
Expand Down
37 changes: 37 additions & 0 deletions tests/test_anypath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from pathlib import Path, PosixPath, WindowsPath

import pytest

from cloudpathlib.anypath import AnyPath, AnyPathTypeError
from cloudpathlib.cloudpath import CloudPath


def test_anypath_path():
path = Path("a/b/c")
assert AnyPath(path) == path
assert AnyPath(str(path)) == path

assert isinstance(path, AnyPath)
assert not isinstance(str(path), AnyPath)

assert issubclass(Path, AnyPath)
assert issubclass(PosixPath, AnyPath)
assert issubclass(WindowsPath, AnyPath)
assert not issubclass(str, AnyPath)


def test_anypath_cloudpath(rig):
cloudpath = rig.create_cloud_path("a/b/c")
assert AnyPath(cloudpath) == cloudpath
assert AnyPath(str(cloudpath)) == cloudpath

assert isinstance(cloudpath, AnyPath)
assert not isinstance(str(cloudpath), AnyPath)

assert issubclass(CloudPath, AnyPath)
assert issubclass(rig.path_class, AnyPath)


def test_anypath_bad_input():
with pytest.raises(AnyPathTypeError):
AnyPath(0)
44 changes: 44 additions & 0 deletions tests/test_integrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from pathlib import Path

from pydantic import BaseModel, ValidationError
import pytest

from cloudpathlib.anypath import AnyPath


def test_pydantic_cloudpath(rig):
class PydanticModel(BaseModel):
cloud_path: rig.path_class

cp = rig.create_cloud_path("a/b/c")

obj = PydanticModel(cloud_path=cp)
assert obj.cloud_path == cp

obj = PydanticModel(cloud_path=str(cp))
assert obj.cloud_path == cp

with pytest.raises(ValidationError):
_ = PydanticModel(cloud_path=0)


def test_pydantic_anypath(rig):
class PydanticModel(BaseModel):
any_path: AnyPath

cp = rig.create_cloud_path("a/b/c")

obj = PydanticModel(any_path=cp)
assert obj.any_path == cp

obj = PydanticModel(any_path=str(cp))
assert obj.any_path == cp

obj = PydanticModel(any_path=Path("a/b/c"))
assert obj.any_path == Path("a/b/c")

obj = PydanticModel(any_path="a/b/c")
assert obj.any_path == Path("a/b/c")

with pytest.raises(ValidationError):
obj = PydanticModel(any_path=0)