diff --git a/conans/client/rest/file_downloader.py b/conans/client/rest/file_downloader.py index 9cc5836b41a..000bbe72138 100644 --- a/conans/client/rest/file_downloader.py +++ b/conans/client/rest/file_downloader.py @@ -1,4 +1,5 @@ import os +import re import time import traceback @@ -43,8 +44,15 @@ def download(self, url, file_path=None, auth=None, retry=None, retry_wait=None, return _call_with_retry(self._output, retry, retry_wait, self._download_file, url, auth, headers, file_path) - def _download_file(self, url, auth, headers, file_path): + def _download_file(self, url, auth, headers, file_path, try_resume=False): t1 = time.time() + if try_resume and file_path and os.path.exists(file_path): + range_start = os.path.getsize(file_path) + headers = headers.copy() if headers else {} + headers["range"] = "bytes={}-".format(range_start) + else: + range_start = 0 + try: response = self._requester.get(url, stream=True, verify=self._verify_ssl, auth=auth, headers=headers) @@ -69,10 +77,11 @@ def read_response(size): def write_chunks(chunks, path): ret = None - downloaded_size = 0 + downloaded_size = range_start if path: mkdir(os.path.dirname(path)) - with open(path, 'wb') as file_handler: + mode = "ab" if range_start else "wb" + with open(path, mode) as file_handler: for chunk in chunks: assert ((six.PY3 and isinstance(chunk, bytes)) or (six.PY2 and isinstance(chunk, str))) @@ -86,24 +95,42 @@ def write_chunks(chunks, path): ret = bytes(ret_data) return ret, downloaded_size + def get_total_length(): + if range_start: + content_range = response.headers.get("Content-Range", "") + match = re.match(r"^bytes (\d+)-(\d+)/(\d+)", content_range) + if not match or range_start != int(match.group(1)): + raise ConanException("Error in resumed download from %s\n" + "Incorrect Content-Range header %s" % (url, content_range)) + return int(match.group(3)) + else: + total_size = response.headers.get('Content-Length') or len(response.content) + return int(total_size) + try: logger.debug("DOWNLOAD: %s" % url) - total_length = response.headers.get('content-length') or len(response.content) - total_length = int(total_length) - description = "Downloading {}".format(os.path.basename(file_path)) if file_path else None + total_length = get_total_length() + action = "Downloading" if range_start == 0 else "Continuing download of" + description = "{} {}".format(action, os.path.basename(file_path)) if file_path else None progress = progress_bar.Progress(total_length, self._output, description) + progress.initial_value(range_start) chunk_size = 1024 if not file_path else 1024 * 100 - encoding = response.headers.get('content-encoding') - gzip = (encoding == "gzip") - written_chunks, total_downloaded_size = write_chunks( progress.update(read_response(chunk_size)), file_path ) response.close() - if total_downloaded_size != total_length and not gzip: + if ( + file_path and total_length > total_downloaded_size > range_start + and response.headers.get("Accept-Ranges") == "bytes" + ): + written_chunks = self._download_file(url, auth, headers, file_path, try_resume=True) + elif ( + total_downloaded_size != total_length + and response.headers.get("Content-Encoding") != "gzip" + ): raise ConanException("Transfer interrupted before " "complete: %s < %s" % (total_downloaded_size, total_length)) diff --git a/conans/test/unittests/client/rest/downloader_test.py b/conans/test/unittests/client/rest/downloader_test.py new file mode 100644 index 00000000000..27a928b65a9 --- /dev/null +++ b/conans/test/unittests/client/rest/downloader_test.py @@ -0,0 +1,115 @@ +import re +import tempfile +import unittest + +from conans.client.rest.file_downloader import FileDownloader +from conans.errors import ConanException +from conans.test.utils.tools import TestBufferConanOutput +from conans.util.files import load + + +class _ConfigMock: + def __init__(self): + self.retry = 0 + self.retry_wait = 0 + + +class MockResponse(object): + def __init__(self, data, headers, status_code=200): + self.data = data + self.ok = True + self.status_code = status_code + self.headers = headers.copy() + self.headers.update({key.lower(): value for key, value in headers.items()}) + + def iter_content(self, size): + for i in range(0, len(self.data), size): + yield self.data[i:i + size] + + def close(self): + pass + + +class MockRequester(object): + retry = 0 + retry_wait = 0 + + def __init__(self, data, chunk_size=None, accept_ranges=True): + self._data = data + self._chunk_size = chunk_size if chunk_size is not None else len(data) + self._accept_ranges = accept_ranges + + def get(self, *_args, **kwargs): + start = 0 + headers = kwargs.get("headers") or {} + transfer_range = headers.get("range", "") + match = re.match(r"bytes=([0-9]+)-", transfer_range) + status = 200 + headers = {"Content-Length": len(self._data), "Accept-Ranges": "bytes"} + if match and self._accept_ranges: + start = int(match.groups()[0]) + status = 206 + headers.update({"Content-Length": len(self._data) - start, + "Content-Range": "bytes {}-{}/{}".format(start, len(self._data) - 1, + len(self._data))}) + assert start <= len(self._data) + + response = MockResponse(self._data[start:start + self._chunk_size], status_code=status, + headers=headers) + return response + + +class DownloaderUnitTest(unittest.TestCase): + def setUp(self): + self.target = tempfile.mktemp() + self.out = TestBufferConanOutput() + + def test_succeed_download_to_file_if_not_interrupted(self): + expected_content = b"some data" + requester = MockRequester(expected_content) + downloader = FileDownloader(requester=requester, output=self.out, verify=None, + config=_ConfigMock()) + downloader.download("fake_url", file_path=self.target) + actual_content = load(self.target, binary=True) + self.assertEqual(expected_content, actual_content) + + def test_succeed_download_to_memory_if_not_interrupted(self): + expected_content = b"some data" + requester = MockRequester(expected_content) + downloader = FileDownloader(requester=requester, output=self.out, verify=None, + config=_ConfigMock()) + actual_content = downloader.download("fake_url", file_path=None) + self.assertEqual(expected_content, actual_content) + + def test_resume_download_to_file_if_interrupted(self): + expected_content = b"some data" + requester = MockRequester(expected_content, chunk_size=4) + downloader = FileDownloader(requester=requester, output=self.out, verify=None, + config=_ConfigMock()) + downloader.download("fake_url", file_path=self.target) + actual_content = load(self.target, binary=True) + self.assertEqual(expected_content, actual_content) + + def test_fail_download_to_memory_if_interrupted(self): + expected_content = b"some data" + requester = MockRequester(expected_content, chunk_size=4) + downloader = FileDownloader(requester=requester, output=self.out, verify=None, + config=_ConfigMock()) + with self.assertRaisesRegexp(ConanException, r"Transfer interrupted before complete"): + downloader.download("fake_url", file_path=None) + + def test_fail_interrupted_download_to_file_if_no_progress(self): + expected_content = b"some data" + requester = MockRequester(expected_content, chunk_size=0) + downloader = FileDownloader(requester=requester, output=self.out, verify=None, + config=_ConfigMock()) + with self.assertRaisesRegexp(ConanException, r"Download failed"): + downloader.download("fake_url", file_path=self.target) + + def test_fail_interrupted_download_if_server_not_accepting_ranges(self): + expected_content = b"some data" + requester = MockRequester(expected_content, chunk_size=4, accept_ranges=False) + downloader = FileDownloader(requester=requester, output=self.out, verify=None, + config=_ConfigMock()) + with self.assertRaisesRegexp(ConanException, r"Incorrect Content-Range header"): + downloader.download("fake_url", file_path=self.target) diff --git a/conans/util/progress_bar.py b/conans/util/progress_bar.py index 97c6d9b6aad..4a076f72fec 100644 --- a/conans/util/progress_bar.py +++ b/conans/util/progress_bar.py @@ -49,6 +49,10 @@ def __init__(self, length, output, description, post_description=None): file=self._output, unit="B", leave=False, dynamic_ncols=False, ascii=True, unit_scale=True, unit_divisor=1024) + def initial_value(self, value): + self._processed_size = value + self._pb_update(value) + def _pb_update(self, chunk_size): if self._tqdm_bar is not None: self._tqdm_bar.update(chunk_size)