Skip to content

Commit

Permalink
2.017 refine regex
Browse files Browse the repository at this point in the history
  • Loading branch information
fffonion committed Feb 2, 2017
1 parent ab76afe commit cd43f41
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 20 deletions.
12 changes: 10 additions & 2 deletions xeHentai/const.py
Expand Up @@ -5,6 +5,7 @@
# fffonion <fffonion@gmail.com>

import os
import re
import sys
import locale

Expand All @@ -15,8 +16,8 @@
CODEPAGE = locale.getdefaultlocale()[1]
ANDROID = 'ANDROID_ARGUMENT' in os.environ

__version__ = 2.016
DEVELOPMENT = False
__version__ = 2.017
DEVELOPMENT = True

SCRIPT_NAME = "xeHentai"

Expand All @@ -31,6 +32,13 @@

DUMMY_FILENAME = "-dummy-"

RE_INDEX = re.compile('.+/(\d+)/([^\/]+)/*')
RE_GALLERY = re.compile('/([a-f0-9]{10})/[^\-]+\-(\d+)')
RE_IMGHASH = re.compile('/h/([a-f0-9]{40})')
RE_FULLIMG = re.compile('fullimg.php\?gid=([a-z0-9]+)&page=(\d+)&key=')

RESTR_SITE = "https*://(?:[g\.]*e\-|ex)hentai\.org"

XEH_STATE_RUNNING = 0
XEH_STATE_SOFT_EXIT = 1 # wait until current task finish and exit
XEH_STATE_FULL_EXIT = 2 # finish current task stage and exit
Expand Down
2 changes: 1 addition & 1 deletion xeHentai/core.py
Expand Up @@ -107,7 +107,7 @@ def add_task(self, url, cfg_dict = {}):
self._all_tasks[t.guid].cleanup()
return 0, t.guid
self._all_tasks[t.guid] = t
if not re.match("^https*://(g\.e\-|ex)hentai\.org/[^/]+/\d+/[^/]+/*#*$", url):
if not re.match("^%s/[^/]+/\d+/[^/]+/*#*$" % RESTR_SITE, url):
t.set_fail(ERR_URL_NOT_RECOGNIZED)
elif not self.has_login and re.match("^https*://exhentai\.org", url):
t.set_fail(ERR_CANT_DOWNLOAD_EXH)
Expand Down
6 changes: 3 additions & 3 deletions xeHentai/filters.py
Expand Up @@ -43,7 +43,7 @@ def flt_metadata(r, suc, fail):
return re.findall("The ban expires in (.+)", r.text)[0]
meta = {}
# print(r.text)
# sample_hash = re.findall('<a href="https*://(?:g.e-hentai|exhentai).org/./([a-f0-9]{10})/\d+\-\d+"><img', r.text)
# sample_hash = re.findall('<a href="%s/./([a-f0-9]{10})/\d+\-\d+"><img' % RESTR_SITE, r.text)
# meta['sample_hash'] = sample_hash
# meta['resampled'] = {}
meta['gjname'] = util.htmlescape(re.findall('="gj">(.*?)</h1>', r.text)[0])
Expand All @@ -59,7 +59,7 @@ def flt_metadata(r, suc, fail):

suc(meta)
# _ = re.findall(
# 'https*://(g\.e\-|ex)hentai\.org/[^/]+/(\d+)/[^/]+/\?p=\d*" onclick="return false"(.*?)</a>',
# '%s/[^/]+/(\d+)/[^/]+/\?p=\d*" onclick="return false"(.*?)</a>' % RESTR_SITE,
# r.text)
# meta['pagecount'] = 1 if len(_) <= 1 else int(pagecount[-2])

Expand Down Expand Up @@ -97,7 +97,7 @@ def flt_pageurl(r, suc, fail):
# input gallery response
# add per image urls if suc; finish task if fail
picpage = re.findall(
'<a href="(https*://(?:g.e-hentai|exhentai).org/./[a-f0-9]{10}/\d+\-\d+)"><img[^>]+blank.gif',
'<a href="(%s/./[a-f0-9]{10}/\d+\-\d+)"><img alt="\d+" title="Page' % RESTR_SITE,
r.text)
if not picpage:
fail(ERR_NO_PAGEURL_FOUND)
Expand Down
23 changes: 9 additions & 14 deletions xeHentai/task.py
Expand Up @@ -19,16 +19,11 @@
else:
from Queue import Queue, Empty

index_re = re.compile('.+/(\d+)/([^\/]+)/*')
gallery_re = re.compile('/([a-f0-9]{10})/[^\-]+\-(\d+)')
imghash_re = re.compile('/h/([a-f0-9]{40})')
fullimg_re = re.compile('fullimg.php\?gid=([a-z0-9]+)&page=(\d+)&key=')

class Task(object):
def __init__(self, url, cfgdict):
self.url = url
if url:
_ = index_re.findall(url)
_ = RE_INDEX.findall(url)
if _:
self.gid, self.sethash = _[0]
self.failcode = 0
Expand Down Expand Up @@ -65,7 +60,7 @@ def set_fail(self, code):
self.meta = {}

def migrate_exhentai(self):
_ = re.findall("(?:https*://g\.e\-hentai\.org)(.+)", self.url)
_ = re.findall("(?:%s)(.+)" % RESTR_SITE, self.url)
if not _:
return False
self.url = "https://exhentai.org%s" % _[0]
Expand All @@ -86,7 +81,7 @@ def migrate_exhentai(self):
# del self.meta['sample_hash']

def base_url(self):
return re.findall("(https*://(?:g\.e\-|ex)hentai\.org)", self.url)[0]
return re.findall(RESTR_SITE, self.url)[0]

# def get_picpage_url(self, pichash):
# # if file resized, this url not works
Expand All @@ -101,7 +96,7 @@ def set_reload_url(self, imgurl, reload_url, fname):
fpath = self.get_fpath()
old_fid = self.get_fname(imgurl)[0]
old_f = os.path.join(fpath, self.get_fidpad(old_fid))
this_fid = int(gallery_re.findall(reload_url)[0][1])
this_fid = int(RE_GALLERY.findall(reload_url)[0][1])
this_f = os.path.join(fpath, self.get_fidpad(this_fid))
self._f_lock.acquire()
if os.path.exists(old_f):
Expand Down Expand Up @@ -154,7 +149,7 @@ def queue_wrapper(self, callback, pichash = None, url = None):
# if fid not in self._flist_done:
# callback(self.get_picpage_url(pichash))
# elif url:
fhash, fid = gallery_re.findall(url)[0]
fhash, fid = RE_GALLERY.findall(url)[0]
# if fhash not in self.meta['filelist']:
# self.meta['resampled'][fhash] = int(fid)
# self.has_ori = True]
Expand All @@ -173,7 +168,7 @@ def save_file(self, imgurl, redirect_url, binary):
if _: # change it if it's a full image
fname = _[0]
self.reload_map[imgurl][1] = fname
_, fid = gallery_re.findall(pageurl)[0]
_, fid = RE_GALLERY.findall(pageurl)[0]

fn = os.path.join(fpath, self.get_fidpad(int(fid)))
if os.path.exists(fn) and os.stat(fn).st_size > 0:
Expand All @@ -195,7 +190,7 @@ def save_file(self, imgurl, redirect_url, binary):

def get_fname(self, imgurl):
pageurl, fname = self.reload_map[imgurl]
_, fid = gallery_re.findall(pageurl)[0]
_, fid = RE_GALLERY.findall(pageurl)[0]
return int(fid), fname

def get_fpath(self):
Expand Down Expand Up @@ -239,7 +234,7 @@ def rename_fname(self):
try:
os.rename(fname_ori, fname_to)
except Exception as ex:
error_list.append(os.path.split(fname_ori)[1], os.path.split(fname_to)[1], str(ex))
error_list.append((os.path.split(fname_ori)[1], os.path.split(fname_to)[1], str(ex)))
cnt += 1
if cnt == self.meta['total']:
with open(os.path.join(fpath, ".xehdone"), "w"):
Expand Down Expand Up @@ -269,7 +264,7 @@ def from_dict(self, j):
[getattr(self, k).put(e, False) for e in j[k]]
else:
setattr(self, k, j[k])
_ = index_re.findall(self.url)
_ = RE_INDEX.findall(self.url)
if _:
self.gid, self.sethash = _[0]
return self
Expand Down

0 comments on commit cd43f41

Please sign in to comment.