From 9310d0032926d35bf6cc5aa941dc0ab65824e6dd Mon Sep 17 00:00:00 2001 From: Wulfre <6633817+Wulfre@users.noreply.github.com> Date: Sat, 11 Apr 2020 22:18:31 -0400 Subject: [PATCH] Refactored script for new API, properly this time. --- .gitignore | 59 ++-------------- e621dl.py | 168 +++++++++++++------------------------------ e621dl/constants.py | 28 ++++---- e621dl/local.py | 43 ++++------- e621dl/remote.py | 169 +++++--------------------------------------- requirements.txt | 2 +- 6 files changed, 105 insertions(+), 364 deletions(-) diff --git a/.gitignore b/.gitignore index 788330c..4ed122a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,58 +1,13 @@ -#################################### -# WINDOWS -#################################### - -# Thumbnail Cache -Thumbs.db -ehthumbs.db -ehthumbs_vista.db - -# Dump -*.stackdump - -# Folder Config -[Dd]esktop.ini - -# Recycle Bin -$RECYCLE.BIN/ - -# Installers -*.cab -*.msi -*.msix -*.msm -*.msp - -# Shortcuts -*.lnk - -#################################### -# VS CODE -#################################### - -# User Files +# VS Code .vscode/ -#################################### -# PYTHON -#################################### - -# Compiled +# Python __pycache__/ *.pyc -# PyInstaller -*.ico -*.manifest -*.spec -build/ -dist/ - -#################################### -# OTHER -#################################### - -# Repo Specific +# Repo downloads/ -build.bat -*config.* +config.yaml + +# Remove before committing +old/ diff --git a/e621dl.py b/e621dl.py index 175fa46..deece7a 100755 --- a/e621dl.py +++ b/e621dl.py @@ -1,134 +1,68 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- + #!/usr/bin/env python3 -# Internal Imports import os -from distutils.version import StrictVersion -from fnmatch import fnmatch - -# Personal Imports +import httpx from e621dl import constants from e621dl import local from e621dl import remote -# This block will only be read if e621dl.py is directly executed as a script. Not if it is imported. if __name__ == '__main__': - # Create the requests session that will be used throughout the run. - with remote.requests_retry_session() as session: - # Set the user-agent. Requirements are specified at https://e621.net/wiki_pages/2425#Basics. - session.headers['User-Agent'] = f"e621dl.py/{constants.VERSION} (by Wulfre)" - - # Check if a new version is released on github. If so, notify the user. - if StrictVersion(constants.VERSION) < StrictVersion(remote.get_github_release(session)): - print('A NEW VERSION OF e621dl IS AVAILABLE ON GITHUB AT https://github.com/Wulfre/e621dl/releases/latest.') - - print(f"[i] Running e621dl version {constants.VERSION}.") - - print('') - print("[i] Parsing config...") - - config = local.get_config() - - # Initialize the lists that will be used to filter posts. - searches = [] - - # Initialize last_id. - last_id = None - - # Initialize login information. - login = { - 'username': config['login'].get('username'), - 'api_key': config['login'].get('api_key') - } - - if login['username'] or login['api_key'] == None: - print('[i] No login detected. Some posts may be hidden and unable to be downloaded.') - - # Initialize user configured options in case any are missing. - default_days = config['default_search'].get('days', 1) - default_score = config['default_search'].get('min_score', -0x7F_FF_FF_FF) - default_favs = config['default_search'].get('min_favs', 0) - default_ratings = config['default_search'].get('ratings', ['s']) - - #blacklist = [remote.get_tag_alias(tag.lower(), session) for tag in config['blacklist']] - blacklist = config['blacklist'] - - for key, value in config['searches'].items(): - # Get the tags that will be searched for. Tags are aliased to their acknowledged names. - #section_tags = [remote.get_tag_alias(tag.lower(), session) for tag in value['tags']] - section_tags = value['tags'] - - # Replace options that are specified by the user. - section_date = local.get_date(value.get('days', default_days)) - section_score = value.get('min_score', default_score) - section_favs = value.get('min_favs', default_favs) - section_ratings = value.get('ratings', default_ratings) - - # Append the final values that will be used for the specific section to the list of searches. - # Note section_tags is a list within a list. - searches.append({ - 'directory': key, - 'tags': section_tags, - 'ratings': section_ratings, - 'min_score': section_score, - 'min_favs': section_favs, - 'earliest_date': section_date - }) - - print('') - print("[i] Checking for partial downloads...") - remote.finish_partial_downloads(session) - + print(f"[i] Running e621dl version {constants.VERSION}.") + + print("[i] Getting config...") + + config = local.get_config() + blacklist = config.get('blacklist', []) + search_defaults = config.get('search_defaults') + + searches = [] + for key, value in config.get('searches').items(): + if len(value.get('tags')) > constants.MAX_SEARCH_TAGS: + print(f"[i] Too many tags in search '{key}'. Tags after {constants.MAX_SEARCH_TAGS} will be discarded.") + value['tags'] = value['tags'][:constants.MAX_SEARCH_TAGS] + + searches.append({ + 'directory': key, + 'tags': value.get('tags'), + 'start_date': local.get_start_date(value.get('days', search_defaults.get('days', 1))), + 'min_score': value.get('min_score', search_defaults.get('min_score', 0)), + 'min_fav_count': value.get('min_fav_count', search_defaults.get('min_fav_count', 0)), + 'allowed_ratings': value.get('allowed_ratings', search_defaults.get('allowed_ratings', ['s'])) + }) + + with httpx.Client( + headers = {'user-agent': f"e621dl.py/{constants.VERSION} (by Wulfre)"}, + auth = (config.get('auth').get('username'), config.get('auth').get('api_key')) if config.get('auth').get('api_key') is not None else None + ) as client: for search in searches: - print('') - - # Creates the string to be sent to the API. - # Currently only 38 items can be sent directly so the rest are discarded to be filtered out later. - if len(search['tags']) > constants.MAX_TAGS: - search_string = ' '.join(search['tags'][:constants.MAX_TAGS]) - else: - search_string = ' '.join(search['tags']) + print(f"[i] Getting posts for search '{search['directory']}'.") - # Sets up a loop that will continue indefinitely until the last post of a search has been found. + last_id = None while True: - print("[i] Getting posts...") - results = remote.get_posts(search_string, search['earliest_date'], last_id, login, session)['posts'] - - # Gets the id of the last post found in the search so that the search can continue. - try: - last_id = results[-1]['id'] - except IndexError: - last_id = None - print('[i] No more posts for current search.') - - for post in results: - path = local.make_path(search['directory'], post['id'], post['file']['ext']) - tags = [x for y in post['tags'].values() for x in y] + posts = remote.get_posts(client, ' '.join(search['tags']), search['start_date'], last_id) + + for post in posts: + path = local.make_path(search.get('directory'), post.get('id'), post.get('file').get('ext')) if os.path.isfile(path): - print(f"[✗] Post {post['id']} was already downloaded.") - elif post['file']['url'] == None: - print(f"[✗] Post {post['id']} was skipped for being hidden to guest users.") - elif post['rating'] not in search['ratings']: - print(f"[✗] Post {post['id']} was skipped for missing a requested rating.") - # Using fnmatch allows for wildcards to be properly filtered. - elif [x for x in tags if any(fnmatch(x, y) for y in blacklist)]: - print(f"[✗] Post {post['id']} was skipped for having a blacklisted tag.") - elif not set(search['tags'][(constants.MAX_TAGS - 1):]).issubset(tags): - print(f"[✗] Post {post['id']} was skipped for missing a requested tag.") - elif int(post['score']['total']) < search['min_score']: - print(f"[✗] Post {post['id']} was skipped for having a low score.") - elif int(post['fav_count']) < search['min_favs']: - print(f"[✗] Post {post['id']} was skipped for having a low favorite count.") + print(f"[i] Post {post.get('id')} was already downloaded.") + elif post.get('file').get('url') is None: + print(f"[✗] Post {post.get('id')} was skipped for being hidden to guests.") + elif post.get('rating') not in search.get('allowed_ratings'): + print(f"[✗] Post {post.get('id')} was skipped for having a mismatched rating.") + elif any(x in [x for y in post.get('tags').values() for x in y] for x in blacklist): + print(f"[✗] Post {post.get('id')} was skipped for having a blacklisted tag.") + elif post.get('score').get('total') < search.get('min_score'): + print(f"[✗] Post {post.get('id')} was skipped for having a low score.") + elif post.get('fav_count') < search.get('min_fav_count'): + print(f"[✗] Post {post.get('id')} was skipped for having a low favorite count.") else: - print(f"[✓] Post {post['id']} is being downloaded.") - remote.download_post(post['file']['url'], path, session) + print(f"[✓] Post {post.get('id')} is being downloaded.") + remote.download_post(client, post.get('file').get('url'), path) - # Break while loop. End program. - if last_id == None: + last_id = posts[-1].get('id') if posts else None + if last_id is None: break - # End program. - print('') - input("[✓] All searches complete. Press ENTER to exit...") + print('[i] All searches complete.') raise SystemExit diff --git a/e621dl/constants.py b/e621dl/constants.py index 88e866a..226c6ef 100644 --- a/e621dl/constants.py +++ b/e621dl/constants.py @@ -1,22 +1,24 @@ VERSION = '5.0.0' -MAX_RESULTS = 320 -MAX_TAGS = 38 +MAX_SEARCH_RESULTS = 320 +MAX_SEARCH_TAGS = 38 +MAX_REQUESTS_PER_SECOND = 1 PARTIAL_DOWNLOAD_EXT = 'request' -DEFAULT_CONFIG_TEXT = '''login: +DEFAULT_CONFIG_TEXT = '''auth: username: api_key: -default_search: +# Note that if you included your auth above, then your account blacklist will already be applied. +blacklist: + +search_defaults: days: 1 min_score: 0 - min_favs: 0 - ratings: + min_fav_count: 0 + allowed_ratings: - s -blacklist: - searches: cats: tags: @@ -33,11 +35,11 @@ # dogs: # days: 30 # min_score: 10 -# min_favs: 10 -# ratings: -# -s -# -q -# -e +# min_fav_count: 10 +# allowed_ratings: +# - s +# - q +# - e # tags: # - dog # - brown_fur''' diff --git a/e621dl/local.py b/e621dl/local.py index fa9aecf..879537e 100644 --- a/e621dl/local.py +++ b/e621dl/local.py @@ -1,45 +1,30 @@ -# Internal Imports -import datetime import os - -# Personal Imports -from e621dl import constants - -# Vendor Imports +from datetime import date import yaml +from e621dl import constants def make_config(): - with open('config.yaml', 'wt', encoding = 'utf-8') as outfile: - outfile.write(constants.DEFAULT_CONFIG_TEXT) - print("[i] New default config file created. Please add tag groups to this file.'") + with open('config.yaml', 'wt', encoding = 'utf-8') as file: + file.write(constants.DEFAULT_CONFIG_TEXT) + print('[i] New default config file created. Please add tag groups to this file.') raise SystemExit def get_config(): if not os.path.isfile('config.yaml'): - print("[!] No config file found.") + print('[!] No config file found.') make_config() - with open('config.yaml', 'rt', encoding = 'utf-8') as infile: - config = yaml.load(infile, Loader = yaml.SafeLoader) - - return config - -def get_date(days_to_check): - ordinal_check_date = datetime.date.today().toordinal() - (days_to_check - 1) - - if ordinal_check_date < 1: - ordinal_check_date = 1 - elif ordinal_check_date > datetime.date.today().toordinal(): - ordinal_check_date = datetime.date.today().toordinal() - - return datetime.date.fromordinal(ordinal_check_date).strftime('%Y-%m-%d') + with open('config.yaml', 'rt', encoding = 'utf-8') as file: + return yaml.load(file, Loader = yaml.SafeLoader) + +def get_start_date(days_to_check): + return date.fromordinal(max(date.today().toordinal() - (days_to_check - 1), 1)).strftime('%Y-%m-%d') -def substitute_illegals(char): - illegals = ['\\', ':', '*', '?', '\"', '<', '>', '|', ' '] - return '_' if char in illegals else char +def substitute_illegal_chars(char): + return '_' if char in ['\\', ':', '*', '?', '\"', '<', '>', '|', ' '] else char def make_path(dir_name, filename, ext): - clean_dir_name = ''.join([substitute_illegals(char) for char in dir_name]).lower() + clean_dir_name = ''.join([substitute_illegal_chars(char) for char in dir_name]) if not os.path.isdir(f"downloads/{clean_dir_name}"): os.makedirs(f"downloads/{clean_dir_name}") diff --git a/e621dl/remote.py b/e621dl/remote.py index edcb332..2675f8e 100644 --- a/e621dl/remote.py +++ b/e621dl/remote.py @@ -1,160 +1,25 @@ -# Internal Imports -import os from time import sleep -from timeit import default_timer -from shutil import copyfileobj - -# Personal Imports from e621dl import constants -from e621dl import local - -# Vendor Imports -import requests -from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry -def requests_retry_session( - retries = 5, - backoff_factor = 0.3, - status_forcelist = (500, 502, 504), - session = None, -): - session = session or requests.Session() - retry = Retry( - total = retries, - read = retries, - connect = retries, - backoff_factor = backoff_factor, - status_forcelist = status_forcelist, - method_whitelist = frozenset(['GET', 'POST']) +def get_posts(client, search_string, start_date, last_id): + response = client.get( + url = 'https://e621.net/posts.json', + params = { + 'limit': constants.MAX_SEARCH_RESULTS, + 'tags': f"{search_string} date:>={start_date} {'id:<' + str(last_id) if last_id else ''}" + } ) - adapter = HTTPAdapter(max_retries = retry) - session.mount('http://', adapter) - session.mount('https://', adapter) - return session - -def delayed_get(url, payload, session): - # Take time before and after getting the requests response. - start = default_timer() - with session.get(url, data = payload) as response: - elapsed = default_timer() - start - - # If the response took less than 1 second - # (a hard limit of 2 requests are allowed per second as per the e621 API) - # Wait for the rest of the 1 second. - if elapsed < 1: - sleep(1 - elapsed) - - return response - -def get_github_release(session): - url = 'https://api.github.com/repos/wulfre/e621dl/releases/latest' - - with session.get(url) as response: - response.raise_for_status() - - return response.json()['tag_name'].strip('v') - -def get_posts(search_string, earliest_date, last_id, login, session): - url = 'https://e621.net/posts.json' - payload = { - 'limit': constants.MAX_RESULTS, - 'tags': f"date:>={earliest_date} {search_string}", - 'login': login['username'], - 'api_key': login['api_key'] - } - - if last_id: - payload.update(tags = f"id:<{last_id} date:>={earliest_date} {search_string}") - - with delayed_get(url, payload, session) as response: - response.raise_for_status() - - return response.json() - -def get_tag_alias(user_tag, session): - prefix = '' - - if ':' in user_tag: - print(f"[!] It is not possible to check if {user_tag} is valid.") - return user_tag - - if user_tag[0] == '~': - prefix = '~' - user_tag = user_tag[1:] - - if user_tag[0] == '-': - prefix = '-' - user_tag = user_tag[1:] + response.raise_for_status() - url = 'https://e621.net/tag/index.json' - payload = {'name': user_tag} + if response.elapsed.total_seconds() < 1: + sleep(1 - response.elapsed.total_seconds()) - with delayed_get(url, payload, session) as response: - response.raise_for_status() - - results = response.json() - - if '*' in user_tag and results: - print(f"[✓] The tag {user_tag} is valid.") - return user_tag - - for tag in results: - if user_tag == tag['name']: - print(f"[✓] The tag {prefix}{user_tag} is valid.") - return f"{prefix}{user_tag}" - - url = 'https://e621.net/tag_alias/index.json' - payload = {'approved': 'true', 'query': user_tag} + return response.json().get('posts') - with delayed_get(url, payload, session) as response: +def download_post(client, url, path): + with client.stream('GET', url) as response: response.raise_for_status() - results = response.json() - - for tag in results: - if user_tag == tag['name']: - url = 'https://e621.net/tag/show.json' - payload = {'id': tag['alias_id']} - - with delayed_get(url, payload, session) as response: - response.raise_for_status() - results = response.json() - - print(f"[✓] The tag {prefix}{user_tag} was changed to {prefix}{results['name']}.") - - return f"{prefix}{results['name']}" - - print(f"[!] The tag {prefix}{user_tag} is spelled incorrectly or does not exist.") - return '' - -def download_post(url, path, session): - if f".{constants.PARTIAL_DOWNLOAD_EXT}" not in path: - path += f".{constants.PARTIAL_DOWNLOAD_EXT}" - - # Creates file if it does not exist so that os.path.getsize does not raise an exception. - try: - open(path, 'x') - except FileExistsError: - pass - - header = {'Range': f"bytes={os.path.getsize(path)}-"} - with session.get(url, stream = True, headers = header) as response: - if response.ok: - with open(path, 'ab') as outfile: - copyfileobj(response.raw, outfile) - - os.rename(path, path.replace(f".{constants.PARTIAL_DOWNLOAD_EXT}", '')) - - else: - print(f"[!] The downoad URL {url} is not available. Error code: {response.status_code}.") - -def finish_partial_downloads(session): - for root, dirs, files in os.walk('downloads/'): - for file in files: - if file.endswith(constants.PARTIAL_DOWNLOAD_EXT): - print(f"[!] Partial download {file} found.") - - path = os.path.join(root, file) - post_id = int(file.split('.')[0]) - url = get_posts(f"id:{post_id}", 0, post_id + 1, session)['posts'][0]['file']['url'] - download_post(url, path, session) + + with open(path, 'wb') as file: + for chunk in response.iter_bytes(): + file.write(chunk) diff --git a/requirements.txt b/requirements.txt index ae1f79e..ed96feb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -requests pyyaml +httpx