Skip to content
This repository has been archived by the owner on Feb 12, 2024. It is now read-only.

Commit

Permalink
Refactored script for new API, properly this time.
Browse files Browse the repository at this point in the history
  • Loading branch information
Wulfre committed Apr 12, 2020
1 parent a169e6a commit 9310d00
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 364 deletions.
59 changes: 7 additions & 52 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,58 +1,13 @@
####################################
# WINDOWS
####################################

# Thumbnail Cache
Thumbs.db
ehthumbs.db
ehthumbs_vista.db

# Dump
*.stackdump

# Folder Config
[Dd]esktop.ini

# Recycle Bin
$RECYCLE.BIN/

# Installers
*.cab
*.msi
*.msix
*.msm
*.msp

# Shortcuts
*.lnk

####################################
# VS CODE
####################################

# User Files
# VS Code
.vscode/

####################################
# PYTHON
####################################

# Compiled
# Python
__pycache__/
*.pyc

# PyInstaller
*.ico
*.manifest
*.spec
build/
dist/

####################################
# OTHER
####################################

# Repo Specific
# Repo
downloads/
build.bat
*config.*
config.yaml

# Remove before committing
old/
168 changes: 51 additions & 117 deletions e621dl.py
Original file line number Diff line number Diff line change
@@ -1,134 +1,68 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#!/usr/bin/env python3

# Internal Imports
import os
from distutils.version import StrictVersion
from fnmatch import fnmatch

# Personal Imports
import httpx
from e621dl import constants
from e621dl import local
from e621dl import remote

# This block will only be read if e621dl.py is directly executed as a script. Not if it is imported.
if __name__ == '__main__':
# Create the requests session that will be used throughout the run.
with remote.requests_retry_session() as session:
# Set the user-agent. Requirements are specified at https://e621.net/wiki_pages/2425#Basics.
session.headers['User-Agent'] = f"e621dl.py/{constants.VERSION} (by Wulfre)"

# Check if a new version is released on github. If so, notify the user.
if StrictVersion(constants.VERSION) < StrictVersion(remote.get_github_release(session)):
print('A NEW VERSION OF e621dl IS AVAILABLE ON GITHUB AT https://github.com/Wulfre/e621dl/releases/latest.')

print(f"[i] Running e621dl version {constants.VERSION}.")

print('')
print("[i] Parsing config...")

config = local.get_config()

# Initialize the lists that will be used to filter posts.
searches = []

# Initialize last_id.
last_id = None

# Initialize login information.
login = {
'username': config['login'].get('username'),
'api_key': config['login'].get('api_key')
}

if login['username'] or login['api_key'] == None:
print('[i] No login detected. Some posts may be hidden and unable to be downloaded.')

# Initialize user configured options in case any are missing.
default_days = config['default_search'].get('days', 1)
default_score = config['default_search'].get('min_score', -0x7F_FF_FF_FF)
default_favs = config['default_search'].get('min_favs', 0)
default_ratings = config['default_search'].get('ratings', ['s'])

#blacklist = [remote.get_tag_alias(tag.lower(), session) for tag in config['blacklist']]
blacklist = config['blacklist']

for key, value in config['searches'].items():
# Get the tags that will be searched for. Tags are aliased to their acknowledged names.
#section_tags = [remote.get_tag_alias(tag.lower(), session) for tag in value['tags']]
section_tags = value['tags']

# Replace options that are specified by the user.
section_date = local.get_date(value.get('days', default_days))
section_score = value.get('min_score', default_score)
section_favs = value.get('min_favs', default_favs)
section_ratings = value.get('ratings', default_ratings)

# Append the final values that will be used for the specific section to the list of searches.
# Note section_tags is a list within a list.
searches.append({
'directory': key,
'tags': section_tags,
'ratings': section_ratings,
'min_score': section_score,
'min_favs': section_favs,
'earliest_date': section_date
})

print('')
print("[i] Checking for partial downloads...")
remote.finish_partial_downloads(session)

print(f"[i] Running e621dl version {constants.VERSION}.")

print("[i] Getting config...")

config = local.get_config()
blacklist = config.get('blacklist', [])
search_defaults = config.get('search_defaults')

searches = []
for key, value in config.get('searches').items():
if len(value.get('tags')) > constants.MAX_SEARCH_TAGS:
print(f"[i] Too many tags in search '{key}'. Tags after {constants.MAX_SEARCH_TAGS} will be discarded.")
value['tags'] = value['tags'][:constants.MAX_SEARCH_TAGS]

searches.append({
'directory': key,
'tags': value.get('tags'),
'start_date': local.get_start_date(value.get('days', search_defaults.get('days', 1))),
'min_score': value.get('min_score', search_defaults.get('min_score', 0)),
'min_fav_count': value.get('min_fav_count', search_defaults.get('min_fav_count', 0)),
'allowed_ratings': value.get('allowed_ratings', search_defaults.get('allowed_ratings', ['s']))
})

with httpx.Client(
headers = {'user-agent': f"e621dl.py/{constants.VERSION} (by Wulfre)"},
auth = (config.get('auth').get('username'), config.get('auth').get('api_key')) if config.get('auth').get('api_key') is not None else None
) as client:
for search in searches:
print('')

# Creates the string to be sent to the API.
# Currently only 38 items can be sent directly so the rest are discarded to be filtered out later.
if len(search['tags']) > constants.MAX_TAGS:
search_string = ' '.join(search['tags'][:constants.MAX_TAGS])
else:
search_string = ' '.join(search['tags'])
print(f"[i] Getting posts for search '{search['directory']}'.")

# Sets up a loop that will continue indefinitely until the last post of a search has been found.
last_id = None
while True:
print("[i] Getting posts...")
results = remote.get_posts(search_string, search['earliest_date'], last_id, login, session)['posts']

# Gets the id of the last post found in the search so that the search can continue.
try:
last_id = results[-1]['id']
except IndexError:
last_id = None
print('[i] No more posts for current search.')

for post in results:
path = local.make_path(search['directory'], post['id'], post['file']['ext'])
tags = [x for y in post['tags'].values() for x in y]
posts = remote.get_posts(client, ' '.join(search['tags']), search['start_date'], last_id)

for post in posts:
path = local.make_path(search.get('directory'), post.get('id'), post.get('file').get('ext'))

if os.path.isfile(path):
print(f"[✗] Post {post['id']} was already downloaded.")
elif post['file']['url'] == None:
print(f"[✗] Post {post['id']} was skipped for being hidden to guest users.")
elif post['rating'] not in search['ratings']:
print(f"[✗] Post {post['id']} was skipped for missing a requested rating.")
# Using fnmatch allows for wildcards to be properly filtered.
elif [x for x in tags if any(fnmatch(x, y) for y in blacklist)]:
print(f"[✗] Post {post['id']} was skipped for having a blacklisted tag.")
elif not set(search['tags'][(constants.MAX_TAGS - 1):]).issubset(tags):
print(f"[✗] Post {post['id']} was skipped for missing a requested tag.")
elif int(post['score']['total']) < search['min_score']:
print(f"[✗] Post {post['id']} was skipped for having a low score.")
elif int(post['fav_count']) < search['min_favs']:
print(f"[✗] Post {post['id']} was skipped for having a low favorite count.")
print(f"[i] Post {post.get('id')} was already downloaded.")
elif post.get('file').get('url') is None:
print(f"[✗] Post {post.get('id')} was skipped for being hidden to guests.")
elif post.get('rating') not in search.get('allowed_ratings'):
print(f"[✗] Post {post.get('id')} was skipped for having a mismatched rating.")
elif any(x in [x for y in post.get('tags').values() for x in y] for x in blacklist):
print(f"[✗] Post {post.get('id')} was skipped for having a blacklisted tag.")
elif post.get('score').get('total') < search.get('min_score'):
print(f"[✗] Post {post.get('id')} was skipped for having a low score.")
elif post.get('fav_count') < search.get('min_fav_count'):
print(f"[✗] Post {post.get('id')} was skipped for having a low favorite count.")
else:
print(f"[✓] Post {post['id']} is being downloaded.")
remote.download_post(post['file']['url'], path, session)
print(f"[✓] Post {post.get('id')} is being downloaded.")
remote.download_post(client, post.get('file').get('url'), path)

# Break while loop. End program.
if last_id == None:
last_id = posts[-1].get('id') if posts else None
if last_id is None:
break

# End program.
print('')
input("[✓] All searches complete. Press ENTER to exit...")
print('[i] All searches complete.')
raise SystemExit
28 changes: 15 additions & 13 deletions e621dl/constants.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
VERSION = '5.0.0'

MAX_RESULTS = 320
MAX_TAGS = 38
MAX_SEARCH_RESULTS = 320
MAX_SEARCH_TAGS = 38
MAX_REQUESTS_PER_SECOND = 1
PARTIAL_DOWNLOAD_EXT = 'request'

DEFAULT_CONFIG_TEXT = '''login:
DEFAULT_CONFIG_TEXT = '''auth:
username:
api_key:
default_search:
# Note that if you included your auth above, then your account blacklist will already be applied.
blacklist:
search_defaults:
days: 1
min_score: 0
min_favs: 0
ratings:
min_fav_count: 0
allowed_ratings:
- s
blacklist:
searches:
cats:
tags:
Expand All @@ -33,11 +35,11 @@
# dogs:
# days: 30
# min_score: 10
# min_favs: 10
# ratings:
# -s
# -q
# -e
# min_fav_count: 10
# allowed_ratings:
# - s
# - q
# - e
# tags:
# - dog
# - brown_fur'''
43 changes: 14 additions & 29 deletions e621dl/local.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,30 @@
# Internal Imports
import datetime
import os

# Personal Imports
from e621dl import constants

# Vendor Imports
from datetime import date
import yaml
from e621dl import constants

def make_config():
with open('config.yaml', 'wt', encoding = 'utf-8') as outfile:
outfile.write(constants.DEFAULT_CONFIG_TEXT)
print("[i] New default config file created. Please add tag groups to this file.'")
with open('config.yaml', 'wt', encoding = 'utf-8') as file:
file.write(constants.DEFAULT_CONFIG_TEXT)
print('[i] New default config file created. Please add tag groups to this file.')
raise SystemExit

def get_config():
if not os.path.isfile('config.yaml'):
print("[!] No config file found.")
print('[!] No config file found.')
make_config()

with open('config.yaml', 'rt', encoding = 'utf-8') as infile:
config = yaml.load(infile, Loader = yaml.SafeLoader)

return config

def get_date(days_to_check):
ordinal_check_date = datetime.date.today().toordinal() - (days_to_check - 1)

if ordinal_check_date < 1:
ordinal_check_date = 1
elif ordinal_check_date > datetime.date.today().toordinal():
ordinal_check_date = datetime.date.today().toordinal()

return datetime.date.fromordinal(ordinal_check_date).strftime('%Y-%m-%d')
with open('config.yaml', 'rt', encoding = 'utf-8') as file:
return yaml.load(file, Loader = yaml.SafeLoader)

def get_start_date(days_to_check):
return date.fromordinal(max(date.today().toordinal() - (days_to_check - 1), 1)).strftime('%Y-%m-%d')

def substitute_illegals(char):
illegals = ['\\', ':', '*', '?', '\"', '<', '>', '|', ' ']
return '_' if char in illegals else char
def substitute_illegal_chars(char):
return '_' if char in ['\\', ':', '*', '?', '\"', '<', '>', '|', ' '] else char

def make_path(dir_name, filename, ext):
clean_dir_name = ''.join([substitute_illegals(char) for char in dir_name]).lower()
clean_dir_name = ''.join([substitute_illegal_chars(char) for char in dir_name])

if not os.path.isdir(f"downloads/{clean_dir_name}"):
os.makedirs(f"downloads/{clean_dir_name}")
Expand Down
Loading

0 comments on commit 9310d00

Please sign in to comment.