Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try to resume interrupted downloads in FileDownloader by using HTTP range requests #6791

Merged
merged 8 commits into from Apr 28, 2020
47 changes: 37 additions & 10 deletions conans/client/rest/file_downloader.py
@@ -1,4 +1,5 @@
import os
import re
import time
import traceback

Expand Down Expand Up @@ -43,8 +44,15 @@ def download(self, url, file_path=None, auth=None, retry=None, retry_wait=None,
return _call_with_retry(self._output, retry, retry_wait, self._download_file, url, auth,
headers, file_path)

def _download_file(self, url, auth, headers, file_path):
def _download_file(self, url, auth, headers, file_path, try_resume=False):
t1 = time.time()
if try_resume and file_path and os.path.exists(file_path):
range_start = os.path.getsize(file_path)
headers = headers.copy() if headers else {}
headers["range"] = "bytes={}-".format(range_start)
else:
range_start = 0

try:
response = self._requester.get(url, stream=True, verify=self._verify_ssl, auth=auth,
headers=headers)
Expand All @@ -69,10 +77,11 @@ def read_response(size):

def write_chunks(chunks, path):
ret = None
downloaded_size = 0
downloaded_size = range_start
if path:
mkdir(os.path.dirname(path))
with open(path, 'wb') as file_handler:
mode = "ab" if range_start else "wb"
with open(path, mode) as file_handler:
for chunk in chunks:
assert ((six.PY3 and isinstance(chunk, bytes)) or
(six.PY2 and isinstance(chunk, str)))
Expand All @@ -86,24 +95,42 @@ def write_chunks(chunks, path):
ret = bytes(ret_data)
return ret, downloaded_size

def get_total_length():
if range_start:
content_range = response.headers.get("Content-Range", "")
match = re.match(r"^bytes (\d+)-(\d+)/(\d+)", content_range)
if not match or range_start != int(match.group(1)):
raise ConanException("Error in resumed download from %s\n"
"Incorrect Content-Range header %s" % (url, content_range))
return int(match.group(3))
else:
total_size = response.headers.get('Content-Length') or len(response.content)
return int(total_size)

try:
logger.debug("DOWNLOAD: %s" % url)
total_length = response.headers.get('content-length') or len(response.content)
total_length = int(total_length)
description = "Downloading {}".format(os.path.basename(file_path)) if file_path else None
total_length = get_total_length()
action = "Downloading" if range_start == 0 else "Continuing download of"
description = "{} {}".format(action, os.path.basename(file_path)) if file_path else None
progress = progress_bar.Progress(total_length, self._output, description)
progress.initial_value(range_start)

chunk_size = 1024 if not file_path else 1024 * 100
encoding = response.headers.get('content-encoding')
gzip = (encoding == "gzip")

written_chunks, total_downloaded_size = write_chunks(
progress.update(read_response(chunk_size)),
file_path
)

response.close()
if total_downloaded_size != total_length and not gzip:
if (
file_path and total_length > total_downloaded_size > range_start
and response.headers.get("Accept-Ranges") == "bytes"
):
written_chunks = self._download_file(url, auth, headers, file_path, try_resume=True)
flashdagger marked this conversation as resolved.
Show resolved Hide resolved
elif (
total_downloaded_size != total_length
and response.headers.get("Content-Encoding") != "gzip"
flashdagger marked this conversation as resolved.
Show resolved Hide resolved
):
raise ConanException("Transfer interrupted before "
"complete: %s < %s" % (total_downloaded_size, total_length))

Expand Down
115 changes: 115 additions & 0 deletions conans/test/unittests/client/rest/downloader_test.py
@@ -0,0 +1,115 @@
import re
import tempfile
import unittest

from conans.client.rest.file_downloader import FileDownloader
from conans.errors import ConanException
from conans.test.utils.tools import TestBufferConanOutput
from conans.util.files import load


class _ConfigMock:
def __init__(self):
self.retry = 0
self.retry_wait = 0


class MockResponse(object):
def __init__(self, data, headers, status_code=200):
self.data = data
self.ok = True
self.status_code = status_code
self.headers = headers.copy()
self.headers.update({key.lower(): value for key, value in headers.items()})

def iter_content(self, size):
for i in range(0, len(self.data), size):
yield self.data[i:i + size]

def close(self):
pass


class MockRequester(object):
retry = 0
retry_wait = 0

def __init__(self, data, chunk_size=None, accept_ranges=True):
self._data = data
self._chunk_size = chunk_size if chunk_size is not None else len(data)
self._accept_ranges = accept_ranges

def get(self, *_args, **kwargs):
start = 0
headers = kwargs.get("headers") or {}
transfer_range = headers.get("range", "")
match = re.match(r"bytes=([0-9]+)-", transfer_range)
status = 200
headers = {"Content-Length": len(self._data), "Accept-Ranges": "bytes"}
if match and self._accept_ranges:
start = int(match.groups()[0])
status = 206
headers.update({"Content-Length": len(self._data) - start,
"Content-Range": "bytes {}-{}/{}".format(start, len(self._data) - 1,
len(self._data))})
assert start <= len(self._data)

response = MockResponse(self._data[start:start + self._chunk_size], status_code=status,
headers=headers)
return response


class DownloaderUnitTest(unittest.TestCase):
def setUp(self):
self.target = tempfile.mktemp()
self.out = TestBufferConanOutput()

def test_succeed_download_to_file_if_not_interrupted(self):
expected_content = b"some data"
requester = MockRequester(expected_content)
downloader = FileDownloader(requester=requester, output=self.out, verify=None,
config=_ConfigMock())
downloader.download("fake_url", file_path=self.target)
actual_content = load(self.target, binary=True)
self.assertEqual(expected_content, actual_content)

def test_succeed_download_to_memory_if_not_interrupted(self):
expected_content = b"some data"
requester = MockRequester(expected_content)
downloader = FileDownloader(requester=requester, output=self.out, verify=None,
config=_ConfigMock())
actual_content = downloader.download("fake_url", file_path=None)
self.assertEqual(expected_content, actual_content)

def test_resume_download_to_file_if_interrupted(self):
expected_content = b"some data"
requester = MockRequester(expected_content, chunk_size=4)
downloader = FileDownloader(requester=requester, output=self.out, verify=None,
config=_ConfigMock())
downloader.download("fake_url", file_path=self.target)
actual_content = load(self.target, binary=True)
self.assertEqual(expected_content, actual_content)

def test_fail_download_to_memory_if_interrupted(self):
expected_content = b"some data"
requester = MockRequester(expected_content, chunk_size=4)
downloader = FileDownloader(requester=requester, output=self.out, verify=None,
config=_ConfigMock())
with self.assertRaisesRegexp(ConanException, r"Transfer interrupted before complete"):
downloader.download("fake_url", file_path=None)

def test_fail_interrupted_download_to_file_if_no_progress(self):
expected_content = b"some data"
requester = MockRequester(expected_content, chunk_size=0)
downloader = FileDownloader(requester=requester, output=self.out, verify=None,
config=_ConfigMock())
with self.assertRaisesRegexp(ConanException, r"Download failed"):
downloader.download("fake_url", file_path=self.target)

def test_fail_interrupted_download_if_server_not_accepting_ranges(self):
expected_content = b"some data"
requester = MockRequester(expected_content, chunk_size=4, accept_ranges=False)
downloader = FileDownloader(requester=requester, output=self.out, verify=None,
config=_ConfigMock())
with self.assertRaisesRegexp(ConanException, r"Incorrect Content-Range header"):
downloader.download("fake_url", file_path=self.target)
4 changes: 4 additions & 0 deletions conans/util/progress_bar.py
Expand Up @@ -49,6 +49,10 @@ def __init__(self, length, output, description, post_description=None):
file=self._output, unit="B", leave=False, dynamic_ncols=False,
ascii=True, unit_scale=True, unit_divisor=1024)

def initial_value(self, value):
self._processed_size = value
self._pb_update(value)

def _pb_update(self, chunk_size):
if self._tqdm_bar is not None:
self._tqdm_bar.update(chunk_size)
Expand Down