From d7ec0664d94b22552729eadef84a5dcaf7319aed Mon Sep 17 00:00:00 2001 From: Fake-Name Date: Wed, 15 Jul 2020 02:06:10 -0700 Subject: [PATCH] Fix https://github.com/fake-name/xA-Scraper/issues/98 Handle a few patreon corner-cases. --- xascraper/modules/fa/faScrape.py | 2 +- xascraper/modules/patreon/patreonScrape.py | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/xascraper/modules/fa/faScrape.py b/xascraper/modules/fa/faScrape.py index 5770d3b..551f417 100644 --- a/xascraper/modules/fa/faScrape.py +++ b/xascraper/modules/fa/faScrape.py @@ -68,7 +68,7 @@ def getCookie(self): if not solver: self.log.error("No captcha solver configured (or no solver with a non-zero balance)! Cannot continue!") - return "Login Failed" + return False, "Login Failed" login_pg = self.wg.getpage('https://www.furaffinity.net/login/?mode=imagecaptcha') diff --git a/xascraper/modules/patreon/patreonScrape.py b/xascraper/modules/patreon/patreonScrape.py index 4c6ca98..7eb10ef 100644 --- a/xascraper/modules/patreon/patreonScrape.py +++ b/xascraper/modules/patreon/patreonScrape.py @@ -375,7 +375,7 @@ def save_json(self, aname, itemid, filecontent): os.makedirs(fqpath) fqpath = os.path.join(fqpath, 'itemid-{id}.pyson'.format(id=itemid)) with open(fqpath, "wb") as fp: - fstr = pprint.pformat(filecontent) + # fstr = pprint.pformat(filecontent) fp.write(fstr.encode("utf-8")) def save_image(self, aname, pid, fname, furl): @@ -483,6 +483,23 @@ def _get_art_post(self, postId, artistName): "?include=media" ) + if 'status' in post and post['status'] == '404': + self.log.warning("Post is not found!") + fail = { + 'status' : '' + } + return fail + + + if not 'data' in post: + self.log.warning("No 'data' member in post!") + pprint.pprint(post) + fail = { + 'status' : '' + } + return fail + + post_content = post['data'] post_info = post_content['attributes'] @@ -513,7 +530,7 @@ def _get_art_post(self, postId, artistName): raise exceptions.CannotAccessException("You can't view that content!") # if not 'content' in post_info: - pprint.pprint(post_content) + # pprint.pprint(post_content) ret = { 'page_desc' : post_info['content'],