Skip to content

Commit

Permalink
Merge pull request #751 from bundlewrap/improve-downloads
Browse files Browse the repository at this point in the history
Timeouts and HEAD requests for file downloads
  • Loading branch information
trehn committed Apr 1, 2024
2 parents 6d9b8a9 + 22f3d7f commit 9dc47b4
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 6 deletions.
60 changes: 57 additions & 3 deletions bundlewrap/items/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
from collections import defaultdict
from contextlib import contextmanager, suppress
from datetime import datetime
try:
from functools import cache
except ImportError: # Python 3.8
cache = lambda f: f
from hashlib import md5
from os import getenv, getpid, makedirs, mkdir, rmdir
from os.path import basename, dirname, exists, isfile, join, normpath
Expand All @@ -17,6 +21,7 @@
from jinja2 import Environment, FileSystemLoader
from mako.lookup import TemplateLookup
from mako.template import Template
from requests import head

from bundlewrap.exceptions import BundleError, FaultUnavailable, TemplateError
from bundlewrap.items import BUILTIN_ITEM_ATTRIBUTES, Item
Expand All @@ -31,6 +36,16 @@
DIFF_MAX_FILE_SIZE = 1024 * 1024 * 5 # bytes


@cache
def check_download(url, timeout):
try:
head(url, timeout=timeout).raise_for_status()
except Exception as exc:
return exc
else:
return None


def content_processor_base64(item):
# .encode() is required for pypy3 only
return b64decode(item._template_content.encode())
Expand Down Expand Up @@ -169,7 +184,11 @@ def download_file(item):
f"starting download from {item.attributes['source']}"
)
with io.job(_("{} {} downloading file".format(bold(item.node.name), bold(item.id)))):
download(item.attributes['source'], file_path)
download(
item.attributes['source'],
file_path,
timeout=item.attributes['download_timeout'],
)
io.debug(
f"{item.node.name}:{item.id}: "
f"finished download from {item.attributes['source']}"
Expand Down Expand Up @@ -236,6 +255,7 @@ class File(Item):
'content_hash': None,
'context': None,
'delete': False,
'download_timeout': 60.0,
'encoding': "utf-8",
'group': "root",
'mode': "0644",
Expand Down Expand Up @@ -498,7 +518,11 @@ def preview(self):
return self.content.decode(self.attributes['encoding'])

def test(self):
if self.attributes['source'] and not exists(self.template):
if (
self.attributes['source']
and self.attributes['content_type'] != 'download'
and not exists(self.template)
):
raise BundleError(_(
"{item} from bundle '{bundle}' refers to missing "
"file '{path}' in its 'source' attribute"
Expand All @@ -508,7 +532,22 @@ def test(self):
path=self.template,
))

if not self.attributes['delete'] and not self.attributes['content_type'] == 'any':
if (
self.attributes['delete']
or self.attributes['content_type'] == 'any'
):
pass
elif (
self.attributes['content_type'] == 'download'
and not self.attributes['content_hash']
):
download_exc = check_download(
self.attributes['source'],
self.attributes['download_timeout'],
)
if download_exc is not None:
raise download_exc
else:
with self._write_local_file() as local_path:
if self.attributes['test_with']:
cmd = self.attributes['test_with'].format(quote(local_path))
Expand Down Expand Up @@ -562,6 +601,21 @@ def validate_attributes(cls, bundle, item_id, attributes):
"not of type 'download'"
).format(item=item_id, bundle=bundle.name))

if 'download_timeout' in attributes and attributes.get('content_type') != 'download':
raise BundleError(_(
"{item} from bundle '{bundle}' specified 'download_timeout', but is "
"not of type 'download'"
).format(item=item_id, bundle=bundle.name))

if 'download_timeout' in attributes:
if (
not isinstance(attributes['download_timeout'], float)
or attributes['download_timeout'] <= 0.0
):
raise BundleError(_(
"download_timeout for {item} from bundle '{bundle}' must be a float > 0.0"
).format(item=item_id, bundle=bundle.name))

if attributes.get('content_type') == 'download':
if 'source' not in attributes:
raise BundleError(_(
Expand Down
4 changes: 2 additions & 2 deletions bundlewrap/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ def cached_property_set(prop):
return cached_property(prop, convert_to=set)


def download(url, path):
def download(url, path, timeout=60.0):
with error_context(url=url, path=path):
if not exists(dirname(path)):
makedirs(dirname(path))
if exists(path):
chmod(path, MODE644)
with open(path, 'wb') as f:
r = get(url, stream=True)
r = get(url, stream=True, timeout=timeout)
r.raise_for_status()
for block in r.iter_content(1024):
if not block:
Expand Down
6 changes: 6 additions & 0 deletions docs/content/items/file.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ When set to `True`, the path of this file will be removed. It doesn't matter if

<hr>

## download_timeout

Only valid if `content_type` is set to `download`. This value can be set to a number of seconds after which an error is thrown if the remote server no longer provides a response. This does NOT limit the total duration of the download. Defaults to `60.0`.

<hr>

## encoding

Encoding of the target file. Note that this applies to the remote file only, your template is still conveniently written in UTF-8 and will be converted by BundleWrap. Defaults to "utf-8". Other possible values (e.g. "latin-1") can be found [here](http://docs.python.org/2/library/codecs.html#standard-encodings). Only allowed with `content_type` `jinja2`, `mako`, or `text`.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"Natural Language :: English",
"Operating System :: POSIX :: Linux",
"Programming Language :: Python",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.8", # remove hack in files.py import when EOL
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
Expand Down

0 comments on commit 9dc47b4

Please sign in to comment.