diff --git a/.github/workflows/testwin.yml b/.github/workflows/testwin.yml new file mode 100644 index 0000000..0b74296 --- /dev/null +++ b/.github/workflows/testwin.yml @@ -0,0 +1,28 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Test Windows + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + runs-on: windows-latest + strategy: + matrix: + python-version: [3.6, 3.7, 3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: pip install -r requirements.txt + - name: Test + run: python -m pytest diff --git a/README.md b/README.md index 9598779..8b3335d 100644 --- a/README.md +++ b/README.md @@ -7,18 +7,25 @@ [![Coverage Status](https://coveralls.io/repos/github/fugue-project/triad/badge.svg)](https://coveralls.io/github/fugue-project/triad) [![Doc](https://readthedocs.org/projects/triad/badge)](https://triad.readthedocs.org) -[Join Fugue-Project on Slack](https://join.slack.com/t/fugue-project/shared_invite/zt-he6tcazr-OCkj2GEv~J9UYoZT3FPM4g) +[![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://join.slack.com/t/fugue-project/shared_invite/zt-jl0pcahu-KdlSOgi~fP50TZWmNxdWYQ) A collection of python utility functions for [Fugue projects](https://github.com/fugue-project) ## Installation -``` + +```bash pip install triad ``` ## Release History +### 0.5.4 + +* Make `FileSystem` work for windows +* Make triad fullly compatible with Windows +* Add windows tests + ### 0.5.3 * Lazy evaluation for `assert_or_throw` diff --git a/tests/collections/test_fs.py b/tests/collections/test_fs.py index f3771d9..9e544fa 100644 --- a/tests/collections/test_fs.py +++ b/tests/collections/test_fs.py @@ -1,12 +1,48 @@ -import os +import fs as pfs from os.path import exists +import os from pytest import raises -from triad.collections.fs import FileSystem, _FSPath +from triad.collections.fs import FileSystem, _FSPath, _modify_path, _is_windows + + +def test_modify_path(): + assert "c:/" == _modify_path("/c:") + assert "s3://" == _modify_path("/s3:") + assert "C:/" == _modify_path("/C:\\") + assert "C:/a" == _modify_path("/C:\\a") + assert "C:/" == _modify_path("/C:\\\\") + assert "C:/a/b" == _modify_path("/C:\\\\a\\b") + assert "C:/" == _modify_path("/C:/") + assert "C:/a" == _modify_path("/C:/a") + assert "C:/" == _modify_path("/C://") + assert "C:/a/b" == _modify_path("/C://a/b") + + assert "/" == _modify_path("file://") + assert "/a/b" == _modify_path("file://a/b") + assert "c:/a/b" == _modify_path("file:///c:/a/b") + assert "C:/" == _modify_path("C://") + assert "c:/x" == _modify_path("c://x") + assert "c:/" == _modify_path("c:/") + assert "c:/x" == _modify_path("c:/x") + assert "c:/" == _modify_path("c:") + assert "c:/" == _modify_path("c:\\") + assert "c:/x/" == _modify_path("c:\\x\\") + raises(NotImplementedError, lambda: _modify_path("\\\\10.0.0.1\1")) + + +def test_is_windows(): + assert not _is_windows("") + assert not _is_windows("c") + assert not _is_windows("c:") + assert not _is_windows("c:\\") + assert _is_windows("c:/") + assert _is_windows("c://") + assert _is_windows("c:/x") def test__FSPath(): - p = _FSPath("/a//b.txt") + p = _FSPath("/a/b.txt") assert "" == p.scheme assert "/" == p.root assert "a/b.txt" == p.relative_path @@ -35,22 +71,31 @@ def test__FSPath(): assert "temp" == p.scheme assert "temp://a" == p.root assert "b" == p.relative_path + assert not p.is_windows # Windows test cases p = _FSPath("c:\\folder\\myfile.txt") assert "" == p.scheme assert "c:/" == p.root assert "folder/myfile.txt" == p.relative_path + assert p.is_windows - p = _FSPath("\\\\tmp\\tmp.txt") + p = _FSPath("c://folder/myfile.txt") assert "" == p.scheme - assert "/" == p.root - assert "tmp/tmp.txt" == p.relative_path + assert "c:/" == p.root + assert "folder/myfile.txt" == p.relative_path + assert p.is_windows - p = _FSPath("\\\\123.123.123.123\\share\\folder\\myfile.txt") + p = _FSPath("c:/folder/myfile.txt") assert "" == p.scheme - assert "/" == p.root - assert "123.123.123.123/share/folder/myfile.txt" == p.relative_path + assert "c:/" == p.root + assert "folder/myfile.txt" == p.relative_path + assert p.is_windows + + raises( + NotImplementedError, + lambda: _FSPath("\\\\123.123.123.123\\share\\folder\\myfile.txt"), + ) raises(ValueError, lambda: _FSPath(None)) raises(ValueError, lambda: _FSPath("")) @@ -59,6 +104,7 @@ def test__FSPath(): def test_fs(tmpdir): + tmpdir = str(tmpdir) # Tests to read and write with tmpdir without FS tmpfile = os.path.join(tmpdir, "f.txt") f = open(tmpfile, "a") @@ -69,32 +115,51 @@ def test_fs(tmpdir): p1 = os.path.join(tmpdir, "a") p2 = os.path.join(tmpdir, "b") - assert not os.path.exists(p1) - assert not os.path.exists(p2) + assert not exists(p1) + assert not exists(p2) fs = MockFS() fs.makedirs(p1) fs.makedirs(p2) - assert os.path.exists(p1) and os.path.isdir(p1) - assert os.path.exists(p2) and os.path.isdir(p2) + assert fs.exists(p1) and exists(p1) and os.path.isdir(p1) + assert fs.exists(p2) and exists(p2) and os.path.isdir(p2) assert 1 == fs.create_called + fs.create_called = 0 fs.makedirs("temp://x/y") fs.makedirs("temp://y/z") - assert 3 == fs.create_called + assert 2 == fs.create_called fs.makedirs("mem://x/y") fs.makedirs("mem://y/z") - assert 5 == fs.create_called + assert 4 == fs.create_called fs.writetext(os.path.join(p1, "a.txt"), "xyz") fs.copy(os.path.join(p1, "a.txt"), "mem://y/z/a.txt") assert "xyz" == fs.readtext("mem://y/z/a.txt") assert not fs.exists("mem://y/z/w/a.txt") - assert 5 == fs.create_called + assert 4 == fs.create_called fs.writetext("mem://from/a.txt", "hello") fs.copy("mem://from/a.txt", "mem://to/a.txt") assert "hello" == fs.readtext("mem://to/a.txt") - assert 7 == fs.create_called + assert 6 == fs.create_called + + +def test_multiple_writes(tmpdir): + fs = FileSystem() + path = os.path.join(tmpdir, "a.txt") + fs.writetext(path, "1") + fs.writetext(path, "2") + assert "2" == fs.readtext(path) + + # auto close is important + d2 = os.path.join(tmpdir, "x", "y") + ff = FileSystem(auto_close=False).makedirs(d2, recreate=True) + ff.writetext("a.txt", "3") + ff.writetext("a.txt", "4") + ff = FileSystem(auto_close=False).makedirs(d2, recreate=True) + ff.writetext("a.txt", "5") + assert "5" == ff.readtext("a.txt") def test_glob(tmpdir): + tmpdir = str(tmpdir) fs = FileSystem() os.makedirs(os.path.join(str(tmpdir), "d1")) os.makedirs(os.path.join(str(tmpdir), "d2", "d2")) @@ -105,8 +170,8 @@ def test_glob(tmpdir): f.write("read test") f.close() assert { - os.path.join(str(tmpdir), "d1", "f1.txt"), - os.path.join(str(tmpdir), "d2", "d2", "f2.txt"), + pfs.path.join(str(tmpdir), "d1", "f1.txt").replace("\\", "/"), + pfs.path.join(str(tmpdir), "d2", "d2", "f2.txt").replace("\\", "/"), } == {x.path for x in fs.glob("**/*.txt", path=str(tmpdir))} fs.makedirs("mem://a/d1") diff --git a/tests/utils/test_pyarrow.py b/tests/utils/test_pyarrow.py index a55f45f..ec30b40 100644 --- a/tests/utils/test_pyarrow.py +++ b/tests/utils/test_pyarrow.py @@ -93,6 +93,8 @@ def test_to_pa_datatype(): assert pa.int32() == to_pa_datatype("int") assert pa.int64() == to_pa_datatype(int) assert pa.float64() == to_pa_datatype(float) + assert pa.string() == to_pa_datatype(str) + assert pa.bool_() == to_pa_datatype(bool) assert pa.float64() == to_pa_datatype(np.float64) assert TRIAD_DEFAULT_TIMESTAMP == to_pa_datatype(datetime) assert pa.date32() == to_pa_datatype(date) diff --git a/triad/collections/fs.py b/triad/collections/fs.py index 8644dc4..f81acab 100644 --- a/triad/collections/fs.py +++ b/triad/collections/fs.py @@ -1,13 +1,18 @@ +import re from threading import RLock -from typing import Dict, Tuple +from typing import Any, Dict, Tuple from urllib.parse import urlparse -from pathlib import PureWindowsPath -from fs import open_fs, tempfs, memoryfs +from triad.utils.hash import to_uuid + +import fs +from fs import memoryfs, open_fs, tempfs from fs.base import FS as FSBase +from fs.glob import BoundGlobber, Globber from fs.mountfs import MountFS -from triad.utils.hash import to_uuid -import os +from fs.subfs import SubFS + +_SCHEME_PREFIX = re.compile(r"^[a-zA-Z0-9\-_]+:") class FileSystem(MountFS): @@ -15,15 +20,19 @@ class FileSystem(MountFS): for this class is that all paths must be absolute path with scheme. To customize different file systems, you should override `create_fs` to provide your own configured file systems. + :Examples: + >>> fs = FileSystem() >>> fs.writetext("mem://from/a.txt", "hello") >>> fs.copy("mem://from/a.txt", "mem://to/a.txt") - :Notice: - If a path is not a local path, it must include the scheme and `netloc` - (the first element after `://`) - :param auto_close: If `True` (the default), the child filesystems - will be closed when `MountFS` is closed. + + .. note:: + + If a path is not a local path, it must include the scheme and `netloc` + (the first element after `://`) + :param auto_close: If `True` (the default), the child filesystems + will be closed when `MountFS` is closed. """ def __init__(self, auto_close: bool = True): @@ -47,6 +56,11 @@ def create_fs(self, root: str) -> FSBase: return fs return open_fs(root) + @property + def glob(self): + """A globber object""" + return _BoundGlobber(self) + def _delegate(self, path) -> Tuple[FSBase, str]: with self._fs_lock: if self._in_create: # pragma: no cover @@ -60,45 +74,85 @@ def _delegate(self, path) -> Tuple[FSBase, str]: m_path = to_uuid(fp.root) + "/" + fp.relative_path return super()._delegate(m_path) + def makedirs( + self, path: str, permissions: Any = None, recreate: bool = False + ) -> SubFS: + """Make a directory, and any missing intermediate directories. + + .. note:: + + This overrides the base ``makedirs`` + + :param path: path to directory from root. + :param permissions: initial permissions, or `None` to use defaults. + :recreate: if `False` (the default), attempting to + create an existing directory will raise an error. Set + to `True` to ignore existing directories. + :return: a sub-directory filesystem. + + :raises fs.errors.DirectoryExists: if the path is already + a directory, and ``recreate`` is `False`. + :raises fs.errors.DirectoryExpected: if one of the ancestors + in the path is not a directory. + """ + self.check() + fs, _path = self._delegate(path) + return fs.makedirs(_path, permissions=permissions, recreate=recreate) + + +class _BoundGlobber(BoundGlobber): + def __call__( + self, + pattern: Any, + path: str = "/", + namespaces: Any = None, + case_sensitive: bool = True, + exclude_dirs: Any = None, + ) -> Globber: + fp = _FSPath(path) + _path = fs.path.join(fp._root, fp._path) if fp.is_windows else path + return super().__call__( + pattern, + path=_path, + namespaces=namespaces, + case_sensitive=case_sensitive, + exclude_dirs=exclude_dirs, + ) + class _FSPath(object): def __init__(self, path: str): if path is None: raise ValueError("path can't be None") - path = self._modify_path(path) - if path.startswith("\\\\") or ( - path[1:].startswith(":\\") and path[0].isalpha() - ): - path = PureWindowsPath(path).as_uri()[7:] + path = _modify_path(path) + self._is_windows = False + if _is_windows(path): self._scheme = "" - if path[0] == "/": - self._root = path[1:4] - path = path[4:] - else: - self._root = "/" - self.path = path[1:] - self._path = path.rstrip("/") + self._root = path[:3] + self._path = path[3:] + self._is_windows = True + elif path.startswith("/"): + self._scheme = "" + self._root = "/" + self._path = fs.path.abspath(path) else: - if path.startswith("file://"): - path = path[6:] - if path.startswith("/"): - self._scheme = "" - self._root = "/" - self._path = os.path.abspath(path) - else: - uri = urlparse(path) - if uri.scheme == "" and not path.startswith("/"): - raise ValueError( - f"invalid {path}, must be abs path either local or with scheme" - ) - self._scheme = uri.scheme - if uri.netloc == "": - raise ValueError(f"invalid path {path}") - self._root = uri.scheme + "://" + uri.netloc - self._path = uri.path - self._path = self._path.lstrip("/") - # if self._path == "": - # raise ValueError(f"invalid path {path}") + uri = urlparse(path) + if uri.scheme == "" and not path.startswith("/"): + raise ValueError( + f"invalid {path}, must be abs path either local or with scheme" + ) + self._scheme = uri.scheme + if uri.netloc == "": + raise ValueError(f"invalid path {path}") + self._root = uri.scheme + "://" + uri.netloc + self._path = uri.path + self._path = self._path.lstrip("/") + # if self._path == "": + # raise ValueError(f"invalid path {path}") + + @property + def is_windows(self) -> bool: + return self._is_windows @property def scheme(self) -> str: @@ -112,11 +166,41 @@ def root(self) -> str: def relative_path(self) -> str: return self._path - def _modify_path(self, path: str) -> str: - """to fix things like /s3:/a/b.txt -> s3://a/b.txt""" - if path.startswith("/"): - p = path.find("/", 1) - if p > 1 and path[p - 1] == ":": - scheme = path[1 : p - 1] - return scheme + "://" + path[p + 1 :] - return path + +def _modify_path(path: str) -> str: # noqa: C901 + """to fix things like /s3:/a/b.txt -> s3://a/b.txt""" + if path.startswith("/"): + s = _SCHEME_PREFIX.search(path[1:]) + if s is not None: + colon = s.end() + scheme = path[1:colon] + if colon + 1 == len(path): # /C: or /s3: + path = scheme + "://" + elif path[colon + 1] == "/": # /s3:/a/b.txt + path = scheme + "://" + path[colon + 1 :].lstrip("/") + elif path[colon + 1] == "\\": # /c:\a\b.txt + path = scheme + ":\\" + path[colon + 1 :].lstrip("\\") + if path.startswith("file:///"): + path = path[8:] + elif path.startswith("file://"): + path = path[6:] + if path.startswith("\\\\"): + # windows \\10.100.168.1\... + raise NotImplementedError(f"path {path} is not supported") + if path != "" and path[0].isalpha(): + if len(path) == 2 and path[1] == ":": + # C: => C:/ + return path[0] + ":/" + if path[1:].startswith(":\\"): + # C:\a\b\c => C:/a/b/c + return path[0] + ":/" + path[3:].replace("\\", "/").lstrip("/") + if path[1:].startswith(":/"): + # C:/a/b/c => C:/a/b/c + return path[0] + ":/" + path[3:].lstrip("/") + return path + + +def _is_windows(path: str) -> bool: + if len(path) < 3: + return False + return path[0].isalpha() and path[1] == ":" and path[2] == "/" diff --git a/triad/utils/pyarrow.py b/triad/utils/pyarrow.py index 73e5a1d..81942d9 100644 --- a/triad/utils/pyarrow.py +++ b/triad/utils/pyarrow.py @@ -132,6 +132,14 @@ def to_pa_datatype(obj: Any) -> pa.DataType: """ if isinstance(obj, pa.DataType): return obj + if obj is bool: + return pa.bool_() + if obj is int: + return pa.int64() + if obj is float: + return pa.float64() + if obj is str: + return pa.string() if isinstance(obj, str): return _parse_type(obj) if issubclass(obj, datetime): diff --git a/triad_version/__init__.py b/triad_version/__init__.py index 43a1e95..6b27eee 100644 --- a/triad_version/__init__.py +++ b/triad_version/__init__.py @@ -1 +1 @@ -__version__ = "0.5.3" +__version__ = "0.5.4"