diff --git a/pywaybackup/archive.py b/pywaybackup/archive.py index 4d6e69b..bbae460 100644 --- a/pywaybackup/archive.py +++ b/pywaybackup/archive.py @@ -8,6 +8,7 @@ import json import urllib.parse import http.client +import queue from urllib.parse import urljoin from datetime import datetime, timezone @@ -146,7 +147,7 @@ def query_list(url: str, range: int, start: int, end: int, explicit: bool, mode: # example download: http://web.archive.org/web/20190815104545id_/https://www.google.com/ -def download_list(output, retry, no_redirect, workers, skipset: set = None): +def download_list(output, retry, no_redirect, workers, skipset: set = None, skipfile = None): """ Download a list of urls in format: [{"timestamp": "20190815104545", "url": "https://www.google.com/"}] """ @@ -166,10 +167,11 @@ def download_list(output, retry, no_redirect, workers, skipset: set = None): for snapshot in sc.SNAPSHOT_COLLECTION: if skipset is not None and skip_read(skipset, snapshot["url_archive"]): skip_count += 1 - vb.write(f"\nSKIPPING -> URL: {snapshot['url_archive']}") continue snapshot_queue.put(snapshot) vb.write(progress=skip_count) + if skip_count > 0: + vb.write(f"\n-----> Skipped snapshots: {skip_count}") threads = [] worker = 0 @@ -184,7 +186,7 @@ def download_list(output, retry, no_redirect, workers, skipset: set = None): successed = len([snapshot for snapshot in sc.SNAPSHOT_COLLECTION if "file" in snapshot and snapshot["file"]]) failed = len([snapshot for snapshot in sc.SNAPSHOT_COLLECTION if "file" in snapshot and not snapshot["file"]]) vb.write(f"\nFiles downloaded: {successed}") - vb.write(f"Not downloaded: {failed}\n") + vb.write(f"Files missing: {failed}\n") @@ -327,18 +329,8 @@ def download(output, snapshot_entry, connection, status_message, no_redirect=Fal f" -> {e}" vb.write(status_message) return False - # connection timeout waits and retries - except requests.exceptions.Timeout as e: - status_message = f"{status_message}\n" + \ - f"TIMEOUT -> ({i+1}/{max_retries}), reconnect in {sleep_time} seconds...\n" + \ - f" -> {e}" - vb.write(status_message) - time.sleep(sleep_time) - # connection refused waits and retries - except ConnectionRefusedError as e: - status_message = f"{status_message}\n" + \ - f"REFUSED -> ({i+1}/{max_retries}), reconnect in {sleep_time} seconds...\n" + \ - f" -> {e}" + # We always miss some exceptions, so completely intercept all exceptions + except: vb.write(status_message) time.sleep(sleep_time) vb.write(f"FAILED -> download, append to failed_urls: {download_url}")