Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 7 additions & 15 deletions pywaybackup/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
import urllib.parse
import http.client
import queue
from urllib.parse import urljoin
from datetime import datetime, timezone

Expand Down Expand Up @@ -146,7 +147,7 @@ def query_list(url: str, range: int, start: int, end: int, explicit: bool, mode:


# example download: http://web.archive.org/web/20190815104545id_/https://www.google.com/
def download_list(output, retry, no_redirect, workers, skipset: set = None):
def download_list(output, retry, no_redirect, workers, skipset: set = None, skipfile = None):
"""
Download a list of urls in format: [{"timestamp": "20190815104545", "url": "https://www.google.com/"}]
"""
Expand All @@ -166,10 +167,11 @@ def download_list(output, retry, no_redirect, workers, skipset: set = None):
for snapshot in sc.SNAPSHOT_COLLECTION:
if skipset is not None and skip_read(skipset, snapshot["url_archive"]):
skip_count += 1
vb.write(f"\nSKIPPING -> URL: {snapshot['url_archive']}")
continue
snapshot_queue.put(snapshot)
vb.write(progress=skip_count)
if skip_count > 0:
vb.write(f"\n-----> Skipped snapshots: {skip_count}")

threads = []
worker = 0
Expand All @@ -184,7 +186,7 @@ def download_list(output, retry, no_redirect, workers, skipset: set = None):
successed = len([snapshot for snapshot in sc.SNAPSHOT_COLLECTION if "file" in snapshot and snapshot["file"]])
failed = len([snapshot for snapshot in sc.SNAPSHOT_COLLECTION if "file" in snapshot and not snapshot["file"]])
vb.write(f"\nFiles downloaded: {successed}")
vb.write(f"Not downloaded: {failed}\n")
vb.write(f"Files missing: {failed}\n")



Expand Down Expand Up @@ -327,18 +329,8 @@ def download(output, snapshot_entry, connection, status_message, no_redirect=Fal
f" -> {e}"
vb.write(status_message)
return False
# connection timeout waits and retries
except requests.exceptions.Timeout as e:
status_message = f"{status_message}\n" + \
f"TIMEOUT -> ({i+1}/{max_retries}), reconnect in {sleep_time} seconds...\n" + \
f" -> {e}"
vb.write(status_message)
time.sleep(sleep_time)
# connection refused waits and retries
except ConnectionRefusedError as e:
status_message = f"{status_message}\n" + \
f"REFUSED -> ({i+1}/{max_retries}), reconnect in {sleep_time} seconds...\n" + \
f" -> {e}"
# We always miss some exceptions, so completely intercept all exceptions
except:
vb.write(status_message)
time.sleep(sleep_time)
vb.write(f"FAILED -> download, append to failed_urls: {download_url}")
Expand Down