Skip to content

Commit

Permalink
tumblr_backup: Stop if API responses stop making forward progress
Browse files Browse the repository at this point in the history
Sometimes, at least when backing up likes, the API can get stuck
endlessly returning the same set of posts instead of returning an empty
list. Inspect _links and stop if the offset/before fails to change.

Fixes bbolli#217
  • Loading branch information
cebtenzzre committed Sep 25, 2020
1 parent 01d3528 commit be6bfb2
Showing 1 changed file with 15 additions and 3 deletions.
18 changes: 15 additions & 3 deletions tumblr_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ def _backup(posts):
# Get the JSON entries from the API, which we can only do for MAX_POSTS posts at once.
# Posts "arrive" in reverse chronological order. Post #0 is the most recent one.
i = options.skip
last_next_offset = None
while True:
# find the upper bound
log(account, "Getting posts %d to %d (of %d expected)\r" % (i, i + MAX_POSTS - 1, count_estimate))
Expand All @@ -596,9 +597,20 @@ def _backup(posts):
continue

posts = _get_content(soup)
# `_backup(posts)` can be empty even when `posts` is not if we don't backup reblogged posts
if not posts or not _backup(posts):
log(account, "Backing up posts found empty set of posts, finishing\r")
if not posts:
log(account, "Found empty set of posts, finishing\r")
break

next_params = soup['response']['_links']['next']['query_params']
next_offset = next_params.get('offset') or next_params.get('before')
if next_offset is not None:
if next_offset == last_next_offset:
log(account, "Found same API response twice, finishing\r")
break
last_next_offset = next_offset

if not _backup(posts):
log(account, "Found last requested post, finishing\r")
break

i += MAX_POSTS
Expand Down

0 comments on commit be6bfb2

Please sign in to comment.