Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FTP downloader #118

Merged
merged 42 commits into from Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
9ffa90e
Add FTP Downloader
andersy005 Nov 5, 2019
cd56a54
Formatting only
andersy005 Nov 5, 2019
4f370e6
Fix styling issues
andersy005 Nov 5, 2019
96e3c79
Remove whitespace
andersy005 Nov 5, 2019
a32f0e7
Fix styling issue
andersy005 Nov 5, 2019
180825c
Refactor FTPDownloader
andersy005 Nov 5, 2019
be24247
Add test for FTP downloader
andersy005 Nov 6, 2019
73109a2
Add FTPDownloader to doc
andersy005 Nov 6, 2019
446dc81
Use ftplib module instead of requests-ftp
andersy005 Nov 7, 2019
37957a6
Update ftp_downloader test
andersy005 Nov 7, 2019
2dcc4de
Update docstring
andersy005 Nov 7, 2019
9f86c8b
Remove f-string for Python3.5 compatibility
andersy005 Nov 7, 2019
d5a577c
Raise Error when the protocol is not supported
andersy005 Nov 7, 2019
317e637
Merge branch 'master' of github.com:fatiando/pooch into ftp-downloader
andersy005 Nov 7, 2019
8787366
Update permission_error test
andersy005 Nov 7, 2019
4cf3c9b
Update availability test
andersy005 Nov 7, 2019
b1a5fbb
Formatting only
andersy005 Nov 7, 2019
ac3dca7
Add test for unsupported protocol
andersy005 Nov 7, 2019
5ba9f63
Attempt at fixing doctest error
andersy005 Nov 7, 2019
e0384c2
Address doctest dictionary issue
andersy005 Nov 8, 2019
9e138ac
Skip permission_error test on windows
andersy005 Nov 8, 2019
a27751e
Use pathlib
andersy005 Nov 12, 2019
066a891
Skip ftp tests on TravisCI
andersy005 Nov 13, 2019
8c92a34
Fix test
andersy005 Nov 14, 2019
8c27187
Fix linting issue
andersy005 Nov 14, 2019
f914d95
Merge branch 'master' of github.com:fatiando/pooch into ftp-downloader
andersy005 Nov 14, 2019
9cab0fa
Update pooch/tests/test_core.py
andersy005 Nov 14, 2019
843c92b
Update pooch/downloaders.py
andersy005 Nov 14, 2019
d683c3f
Update pooch/tests/test_core.py
andersy005 Nov 14, 2019
67d11a0
Update pooch/tests/test_core.py
andersy005 Nov 14, 2019
bf440a9
Update pooch/tests/test_core.py
andersy005 Nov 14, 2019
acdf4c1
formatting only
andersy005 Nov 14, 2019
7405bc3
Put old test back and clean up FTPDowloader
andersy005 Nov 14, 2019
4bd86d9
Remove optional from docstring
andersy005 Nov 14, 2019
f4b538c
Use try block to make sure connection is closed
andersy005 Nov 14, 2019
b42d13a
Update docstring
andersy005 Nov 14, 2019
c07ef8e
Update docstring
andersy005 Nov 14, 2019
cb3338f
Merge branch 'master' into ftp-downloader
leouieda Nov 18, 2019
6c06e6d
Add progressbar test for FTP
leouieda Nov 18, 2019
1d69cc6
Simplify is_available
leouieda Nov 18, 2019
50e92d2
Refactor FTPDownloader to make it simpler and safer
leouieda Nov 18, 2019
193604e
Add FTP section to usage docs
leouieda Nov 18, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 17 additions & 7 deletions pooch/core.py
Expand Up @@ -8,9 +8,9 @@
from warnings import warn

import requests

from .utils import file_hash, check_version
from .downloaders import HTTPDownloader
from requests_ftp.ftp import FTPSession
from .utils import file_hash, check_version, infer_protocol_options
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
from .downloaders import HTTPDownloader, FTPDownloader


def create(
Expand Down Expand Up @@ -341,7 +341,7 @@ def mydownloader(url, output_file, pooch):
os.makedirs(str(self.abspath))

full_path = self.abspath / fname

url = self.get_url(fname)
in_storage = full_path.exists()
if not in_storage:
action = "download"
Expand All @@ -357,15 +357,20 @@ def mydownloader(url, output_file, pooch):
action_word[action], fname, self.get_url(fname), str(self.path)
)
)

if downloader is None:
downloader = HTTPDownloader()
options = infer_protocol_options(url)
if options["protocol"] == "ftp":
downloader = FTPDownloader()
else:
downloader = HTTPDownloader()
# Stream the file to a temporary so that we can safely check its hash before
# overwriting the original
tmp = tempfile.NamedTemporaryFile(delete=False, dir=str(self.abspath))
# Close the temp file so that the downloader can decide how to opened it
tmp.close()
try:
downloader(self.get_url(fname), tmp.name, self)
downloader(url, tmp.name, self)
self._check_download_hash(fname, tmp.name)
# Ensure the parent directory exists in case the file is in a
# subdirectory. Otherwise, move will cause an error.
Expand Down Expand Up @@ -481,5 +486,10 @@ def is_available(self, fname):
"""
self._assert_file_in_registry(fname)
source = self.get_url(fname)
response = requests.head(source, allow_redirects=True)
options = infer_protocol_options(source)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
options = infer_protocol_options(source)

if options["protocol"] == "ftp":
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
session = FTPSession()
else:
session = requests
response = session.head(source, allow_redirects=True)
return bool(response.status_code == 200)
56 changes: 56 additions & 0 deletions pooch/downloaders.py
Expand Up @@ -4,6 +4,7 @@
import sys

import requests
from requests_ftp.ftp import FTPSession

try:
from tqdm import tqdm
Expand Down Expand Up @@ -149,3 +150,58 @@ def __call__(self, url, output_file, pooch):
finally:
if ispath:
output_file.close()


class FTPDownloader(HTTPDownloader):
def __call__(self, url, output_file, pooch):
"""
Download the given URL over FTP to the given output file.

Uses :func:`requests_ftp.ftp.FTPSession().get`.

Parameters
----------
url : str
The URL to the file you want to download.
output_file : str or file-like object
Path (and file name) to which the file will be downloaded.
pooch : :class:`~pooch.Pooch`
The instance of :class:`~pooch.Pooch` that is calling this method.
"""

kwargs = self.kwargs.copy()
kwargs.setdefault("stream", True)
ispath = not hasattr(output_file, "write")
if ispath:
output_file = open(output_file, "w+b")
try:
session = FTPSession()
response = session.get(url, **kwargs)
response.raise_for_status()
content = response.iter_content(chunk_size=self.chunk_size)
if self.progressbar:
total = int(response.headers.get("content-length", 0))
use_ascii = bool(sys.platform == "win32")
progress = tqdm(
total=total,
ncols=79,
ascii=use_ascii,
unit="B",
unit_scale=True,
leave=True,
)
for chunk in content:
if chunk:
output_file.write(chunk)
output_file.flush()
if self.progressbar:
progress.update(self.chunk_size)

if self.progressbar:
progress.reset()
progress.update(total)
progress.close()

finally:
if ispath:
output_file.close()
11 changes: 10 additions & 1 deletion pooch/tests/test_utils.py
Expand Up @@ -6,7 +6,7 @@
from tempfile import NamedTemporaryFile

from ..core import Pooch
from ..utils import make_registry
from ..utils import make_registry, infer_protocol_options
from .utils import check_tiny_data

DATA_DIR = str(Path(__file__).parent / "data" / "store")
Expand Down Expand Up @@ -60,3 +60,12 @@ def test_registry_builder_recursive():
check_tiny_data(pup.fetch("subdir/tiny-data.txt"))
finally:
os.remove(outfile.name)


def test_infer_protocol_options():
"Infer protocol options from URL"
url = "http://127.0.0.1:8080/test.nc"
assert infer_protocol_options(url) == {"protocol": "http", "path": url}

url = "ftp://127.0.0.1:8080/test.nc"
assert infer_protocol_options(url) == {"protocol": "ftp", "path": url}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since these are pretty straight forward tests, they can be doctests in the function docstring instead (see comment below).

20 changes: 19 additions & 1 deletion pooch/utils.py
Expand Up @@ -3,7 +3,7 @@
"""
from pathlib import Path
import hashlib

from urllib.parse import urlsplit
import appdirs
from packaging.version import Version

Expand Down Expand Up @@ -160,3 +160,21 @@ def make_registry(directory, output, recursive=True):
# Only use Unix separators for the registry so that we don't go insane
# dealing with file paths.
outfile.write("{} {}\n".format(fname.replace("\\", "/"), fhash))


def infer_protocol_options(urlpath):
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
"""
Infer protocol options from URL path.
andersy005 marked this conversation as resolved.
Show resolved Hide resolved

andersy005 marked this conversation as resolved.
Show resolved Hide resolved
Parameters
----------
urlpath : str or unicode
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
URL (e.g.: http://127.0.0.1:8080/test.nc, ftp://127.0.0.1:8080/test.nc)

andersy005 marked this conversation as resolved.
Show resolved Hide resolved
Returns
-------
Options dict
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
"""
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
parsed_path = urlsplit(urlpath)
protocol = parsed_path.scheme or "file"
return {"protocol": protocol, "path": urlpath}
andersy005 marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions requirements.txt
@@ -1,3 +1,4 @@
requests
packaging
appdirs
requests-ftp