From 12a0a7a3389a7718963e84dc8eaaf891cd01fc1a Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Sat, 18 Apr 2020 09:29:52 +0300 Subject: [PATCH 1/8] remote: add support for WebDAV Fixes #1153 --- dvc/remote/__init__.py | 4 ++++ dvc/remote/base.py | 8 +++---- dvc/remote/webdav.py | 53 ++++++++++++++++++++++++++++++++++++++++++ dvc/remote/webdavs.py | 6 +++++ dvc/scheme.py | 2 ++ 5 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 dvc/remote/webdav.py create mode 100644 dvc/remote/webdavs.py diff --git a/dvc/remote/__init__.py b/dvc/remote/__init__.py index 6c2de0a057..76c0047272 100644 --- a/dvc/remote/__init__.py +++ b/dvc/remote/__init__.py @@ -11,6 +11,8 @@ from dvc.remote.oss import RemoteOSS from dvc.remote.s3 import RemoteS3 from dvc.remote.ssh import RemoteSSH +from dvc.remote.webdav import RemoteWEBDAV +from dvc.remote.webdavs import RemoteWEBDAVS REMOTES = [ @@ -23,6 +25,8 @@ RemoteS3, RemoteSSH, RemoteOSS, + RemoteWEBDAV, + RemoteWEBDAVS, # NOTE: RemoteLOCAL is the default ] diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 93e22cdc2e..0384268354 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -552,8 +552,8 @@ def upload(self, from_info, to_info, name=None, no_progress_bar=False): if not hasattr(self, "_upload"): raise RemoteActionNotImplemented("upload", self.scheme) - if to_info.scheme != self.scheme: - raise NotImplementedError + # if to_info.scheme != self.scheme: + # raise NotImplementedError if from_info.scheme != "local": raise NotImplementedError @@ -588,8 +588,8 @@ def download( if not hasattr(self, "_download"): raise RemoteActionNotImplemented("download", self.scheme) - if from_info.scheme != self.scheme: - raise NotImplementedError + # if from_info.scheme != self.scheme: + # raise NotImplementedError if to_info.scheme == self.scheme != "local": self.copy(from_info, to_info) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py new file mode 100644 index 0000000000..54618c1285 --- /dev/null +++ b/dvc/remote/webdav.py @@ -0,0 +1,53 @@ +from .http import RemoteHTTP, ask_password +from dvc.scheme import Schemes + +import os.path + +from dvc.progress import Tqdm + + +class RemoteWEBDAV(RemoteHTTP): + scheme = Schemes.WEBDAV + + def __init__(self, repo, config): + super().__init__(repo, config) + + url = config.get("url") + if url: + self.path_info = self.path_cls(url) + self.path_info.scheme = self.path_info.scheme.replace( + "webdav", "http") + user = config.get("user", None) + if user: + self.path_info.user = user + else: + self.path_info = None + + self.auth = config.get("auth", None) + self.custom_auth_header = config.get("custom_auth_header", None) + self.password = config.get("password", None) + self.ask_password = config.get("ask_password", False) + self.headers = {} + + def _upload(self, from_file, to_info, name=None, no_progress_bar=False): + def chunks(): + with open(from_file, "rb") as fd: + with Tqdm.wrapattr( + fd, + "read", + total=None + if no_progress_bar + else os.path.getsize(from_file), + leave=False, + desc=to_info.url if name is None else name, + disable=no_progress_bar, + ) as fd_wrapped: + while True: + chunk = fd_wrapped.read(self.CHUNK_SIZE) + if not chunk: + break + yield chunk + + response = self._request("PUT", to_info.url, data=chunks()) + if response.status_code not in (200, 201): + raise HTTPError(response.status_code, response.reason) diff --git a/dvc/remote/webdavs.py b/dvc/remote/webdavs.py new file mode 100644 index 0000000000..ec4d934d5c --- /dev/null +++ b/dvc/remote/webdavs.py @@ -0,0 +1,6 @@ +from .webdav import RemoteWEBDAV +from dvc.scheme import Schemes + + +class RemoteWEBDAVS(RemoteWEBDAV): + scheme = Schemes.WEBDAVS diff --git a/dvc/scheme.py b/dvc/scheme.py index e64e24f5ac..76c6d7a497 100644 --- a/dvc/scheme.py +++ b/dvc/scheme.py @@ -9,3 +9,5 @@ class Schemes: GDRIVE = "gdrive" LOCAL = "local" OSS = "oss" + WEBDAV = "webdav" + WEBDAVS = "webdavs" From 9e8a97be73e2e89f9da0a02f4b879544f948aa51 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Sat, 18 Apr 2020 09:55:19 +0300 Subject: [PATCH 2/8] remote: WebDav Fix deepsource error --- dvc/remote/webdav.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 54618c1285..24162551dd 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -1,9 +1,10 @@ -from .http import RemoteHTTP, ask_password +from .http import RemoteHTTP from dvc.scheme import Schemes import os.path from dvc.progress import Tqdm +from dvc.exceptions import HTTPError class RemoteWEBDAV(RemoteHTTP): From 5dc4dd802cb60f123e46af976b7569bf2ee8183f Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 11:29:35 +0300 Subject: [PATCH 3/8] remote: WebDav Fix: base to the original state. Add WebdavURLInfo. Creating intermediate collections. --- dvc/remote/base.py | 8 ++--- dvc/remote/webdav.py | 74 ++++++++++++++++++++++++++++++-------------- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 0384268354..93e22cdc2e 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -552,8 +552,8 @@ def upload(self, from_info, to_info, name=None, no_progress_bar=False): if not hasattr(self, "_upload"): raise RemoteActionNotImplemented("upload", self.scheme) - # if to_info.scheme != self.scheme: - # raise NotImplementedError + if to_info.scheme != self.scheme: + raise NotImplementedError if from_info.scheme != "local": raise NotImplementedError @@ -588,8 +588,8 @@ def download( if not hasattr(self, "_download"): raise RemoteActionNotImplemented("download", self.scheme) - # if from_info.scheme != self.scheme: - # raise NotImplementedError + if from_info.scheme != self.scheme: + raise NotImplementedError if to_info.scheme == self.scheme != "local": self.copy(from_info, to_info) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 24162551dd..41135246ed 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -1,34 +1,49 @@ -from .http import RemoteHTTP -from dvc.scheme import Schemes - +import copy import os.path +from funcy import cached_property + +from .http import RemoteHTTP +from dvc.scheme import Schemes +from dvc.path_info import HTTPURLInfo from dvc.progress import Tqdm from dvc.exceptions import HTTPError +class WebdavURLInfo(HTTPURLInfo): + def __init__(self, url): + super().__init__(url) + + @cached_property + def url(self): + return "{}://{}{}{}{}{}".format( + self.scheme.replace("webdav", "http"), + self.netloc, + self._spath, + (";" + self.params) if self.params else "", + ("?" + self.query) if self.query else "", + ("#" + self.fragment) if self.fragment else "", + ) + + def get_collections(self) -> list: + def pcol(path): + return "{}://{}{}".format( + self.scheme.replace("webdav", "http"), + self.netloc, + path, + ) + p = self.path.split("/")[1:-1] + if not p: + return [] + r = [] + for i in range(len(p)): + r.append(pcol("/{}/".format("/".join(p[:i + 1])))) + return r + + class RemoteWEBDAV(RemoteHTTP): scheme = Schemes.WEBDAV - - def __init__(self, repo, config): - super().__init__(repo, config) - - url = config.get("url") - if url: - self.path_info = self.path_cls(url) - self.path_info.scheme = self.path_info.scheme.replace( - "webdav", "http") - user = config.get("user", None) - if user: - self.path_info.user = user - else: - self.path_info = None - - self.auth = config.get("auth", None) - self.custom_auth_header = config.get("custom_auth_header", None) - self.password = config.get("password", None) - self.ask_password = config.get("ask_password", False) - self.headers = {} + path_cls = WebdavURLInfo def _upload(self, from_file, to_info, name=None, no_progress_bar=False): def chunks(): @@ -49,6 +64,19 @@ def chunks(): break yield chunk + self._create_collections(to_info) response = self._request("PUT", to_info.url, data=chunks()) if response.status_code not in (200, 201): raise HTTPError(response.status_code, response.reason) + + def _create_collections(self, to_info): + url_cols = to_info.get_collections() + i = len(url_cols) + while i > 0: + if bool(self._request("HEAD", url_cols[i - 1])): + break + i -= 1 + for i in range(i, len(url_cols)): + response = self._request("MKCOL", url_cols[i]) + if response.status_code not in (200, 201): + raise HTTPError(response.status_code, response.reason) From 4d0d7bb7f49870b4c003b89cef29b6949f971345 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 11:57:33 +0300 Subject: [PATCH 4/8] remote: WebDav Fix build error --- dvc/remote/webdav.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 41135246ed..e21c56f421 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -1,4 +1,3 @@ -import copy import os.path from funcy import cached_property @@ -71,11 +70,9 @@ def chunks(): def _create_collections(self, to_info): url_cols = to_info.get_collections() - i = len(url_cols) - while i > 0: + for i in reversed(range(len(url_cols) + 1)): if bool(self._request("HEAD", url_cols[i - 1])): break - i -= 1 for i in range(i, len(url_cols)): response = self._request("MKCOL", url_cols[i]) if response.status_code not in (200, 201): From 441e9f4dc28f7d1e8063b8fa85338eb0483b7214 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 20:46:57 +0300 Subject: [PATCH 5/8] remote: WebDav Fix deepsource error --- dvc/remote/webdav.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index e21c56f421..4e87c256d4 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -70,10 +70,21 @@ def chunks(): def _create_collections(self, to_info): url_cols = to_info.get_collections() + from_i = 0 for i in reversed(range(len(url_cols) + 1)): + from_i = i if bool(self._request("HEAD", url_cols[i - 1])): break - for i in range(i, len(url_cols)): + for i in range(from_i, len(url_cols)): response = self._request("MKCOL", url_cols[i]) if response.status_code not in (200, 201): raise HTTPError(response.status_code, response.reason) + + def gc(self): + raise NotImplementedError + + def list_cache_paths(self, prefix=None, progress_callback=None): + raise NotImplementedError + + def walk_files(self, path_info): + raise NotImplementedError From 74e8f53f79dddb7db24448bfab889603e6409ae0 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 20:49:02 +0300 Subject: [PATCH 6/8] remote: WebDav Fix deepseource error (webdavs) --- dvc/remote/webdavs.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dvc/remote/webdavs.py b/dvc/remote/webdavs.py index ec4d934d5c..1302123340 100644 --- a/dvc/remote/webdavs.py +++ b/dvc/remote/webdavs.py @@ -4,3 +4,12 @@ class RemoteWEBDAVS(RemoteWEBDAV): scheme = Schemes.WEBDAVS + + def gc(self): + raise NotImplementedError + + def list_cache_paths(self, prefix=None, progress_callback=None): + raise NotImplementedError + + def walk_files(self, path_info): + raise NotImplementedError From a997811347baf17d004fa714ed749b5a41083ae7 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 21:06:53 +0300 Subject: [PATCH 7/8] remote: WebDav Fix codeclimat --- dvc/remote/webdav.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 4e87c256d4..e57d9015c3 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -70,13 +70,13 @@ def chunks(): def _create_collections(self, to_info): url_cols = to_info.get_collections() - from_i = 0 - for i in reversed(range(len(url_cols) + 1)): - from_i = i - if bool(self._request("HEAD", url_cols[i - 1])): + from_idx = 0 + for idx in reversed(range(len(url_cols) + 1)): + from_idx = idx + if bool(self._request("HEAD", url_cols[idx - 1])): break - for i in range(from_i, len(url_cols)): - response = self._request("MKCOL", url_cols[i]) + for idx in range(from_idx, len(url_cols)): + response = self._request("MKCOL", url_cols[idx]) if response.status_code not in (200, 201): raise HTTPError(response.status_code, response.reason) From 432b34fe7df1ae24df9bca14a16b9c52d4870a3d Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Mon, 20 Apr 2020 18:07:09 +0000 Subject: [PATCH 8/8] Restyled by black --- dvc/remote/webdav.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index e57d9015c3..7348befea2 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -27,16 +27,15 @@ def url(self): def get_collections(self) -> list: def pcol(path): return "{}://{}{}".format( - self.scheme.replace("webdav", "http"), - self.netloc, - path, + self.scheme.replace("webdav", "http"), self.netloc, path, ) + p = self.path.split("/")[1:-1] if not p: return [] r = [] for i in range(len(p)): - r.append(pcol("/{}/".format("/".join(p[:i + 1])))) + r.append(pcol("/{}/".format("/".join(p[: i + 1])))) return r