Skip to content
Permalink
Browse files

Replace network-based test with admin/status endpoint

- Turns out the network test was failing because there was a test to prevent that from happening :) Moved that into a "/admin/status" page; we won't get notified, but I think it's ok for now
  • Loading branch information
cdrini committed Jan 10, 2020
1 parent 5260851 commit dfa0b0a54301a1b359353a60d87c67972e7a12f7
@@ -14,7 +14,7 @@
from openlibrary.catalog.add_book import load
from openlibrary import accounts

BWB_URL = 'https://betterworldbooks.com'
BETTERWORLDBOOKS_BASE_URL = 'https://betterworldbooks.com'
BETTERWORLDBOOKS_API_URL = 'https://products.betterworldbooks.com/service.aspx?ItemId='
BWB_AFFILIATE_LINK = 'http://www.anrdoezrs.net/links/{}/type/dlg/http://www.betterworldbooks.com/-id-%s'.format(h.affiliate_id('betterworldbooks'))
AMAZON_FULL_DATE_RE = re.compile('\d{4}-\d\d-\d\d')
@@ -282,28 +282,25 @@ def get_betterworldbooks_metadata(isbn, thirdparty=False):
return _get_betterworldbooks_thirdparty_metadata(isbn)
return metadata
except Exception:
return {}
return betterworldbooks_fmt(isbn)

def _get_betterworldbooks_thirdparty_metadata(isbn):
"""Scrapes metadata from betterworldbooks website in the case the
Product API returns no result (i.e. includes 3rd party vendor inventory)
:param str isbn: Unnormalisied ISBN10 or ISBN13
:param str isbn: Unnormalised ISBN10 or ISBN13
:return: Metadata for a single BWB book, currently listed on their catalog, or error dict.
:rtype: dict
"""
url = '%s/product/detail/-%s' % (BWB_URL, isbn)
try:
content = urllib2.urlopen(url).read()
results = [betterworldbooks_fmt(
isbn,
qlt=i[0].lower(),
price=i[1]
) for i in re.findall('data-condition="(New|Used).*data-price=\"([0-9.]+)"', content)]
cheapest = sorted(results, key=lambda i: Decimal(i['price_amt']))[0]
return cheapest
except Exception:
return betterworldbooks_fmt(isbn)
url = '%s/product/detail/-%s' % (BETTERWORLDBOOKS_BASE_URL, isbn)
content = requests.get(url).text
results = [betterworldbooks_fmt(
isbn,
qlt=i[0].lower(),
price=i[1]
) for i in re.findall('data-condition="(New|Used).*data-price="([0-9.]+)"', content)]
cheapest = sorted(results, key=lambda i: Decimal(i['price_amt']))[0]
return cheapest

def _get_betterworldbooks_metadata(isbn):
"""Returns price and other metadata (currently minimal)
@@ -316,8 +313,7 @@ def _get_betterworldbooks_metadata(isbn):

url = BETTERWORLDBOOKS_API_URL + isbn
try:
responses = urllib2.urlopen(url).read()
product_url = re.findall("<DetailURLPage>\$(.+)</DetailURLPage>", response)
response = requests.get(url).content
new_qty = re.findall("<TotalNew>([0-9]+)</TotalNew>", response)
new_price = re.findall("<LowestNewPrice>\$([0-9.]+)</LowestNewPrice>", response)
used_price = re.findall("<LowestUsedPrice>\$([0-9.]+)</LowestUsedPrice>", response)
@@ -362,5 +358,20 @@ def betterworldbooks_fmt(isbn, qlt=None, price=None):
}


def check_bwb_scraper_status():
"""
Check if the bwb scraper is still working; since it's checking
HTML, we want to know if it's stopped working.
:rtype: bool, str
"""
# Pull a random (available) book from betterworldbooks
content = requests.get(BETTERWORLDBOOKS_BASE_URL).text
isbn = re.findall(r'isbn1="([0-9]+)"', content)[0]
if not isbn:
return False, 'ISBN missing from %s' % BETTERWORLDBOOKS_BASE_URL
data = _get_betterworldbooks_thirdparty_metadata(isbn)
return 'price_amt' in data, simplejson.dumps(data, indent=4 * ' ')


cached_get_betterworldbooks_metadata = cache.memcache_memoize(
_get_betterworldbooks_metadata, "upstream.code._get_betterworldbooks_metadata", timeout=dateutil.HALF_DAY_SECS)
@@ -691,11 +691,19 @@ def GET(self):
return f.read()

class sponsorship_stats:

def GET(self):
from openlibrary.core.sponsorships import summary
return render_template("admin/sponsorship", summary())

class status:
def GET(self):
from openlibrary.core.vendors import check_bwb_scraper_status
statuses = [
('BetterWorldBooks Scraper', check_bwb_scraper_status())
]
return render_template("admin/status", statuses)


def setup():
register_admin_page('/admin/git-pull', gitpull, label='git-pull')
register_admin_page('/admin/reload', reload, label='Reload Templates')
@@ -723,6 +731,7 @@ def setup():
register_admin_page('/admin/imports/(\d\d\d\d-\d\d-\d\d)', imports_by_date, label="")
register_admin_page('/admin/spamwords', spamwords, label="")
register_admin_page('/admin/sponsorship', sponsorship_stats, label="Sponsorship")
register_admin_page('/admin/status', status, label="Status")

import mem

@@ -11,6 +11,7 @@
<div class="superNav">
$:link("/admin", _("Admin Center"))
| $:link("/admin/people", _("People"))
| $:link("/admin/sponsorship", _("Sponsorship"))
| $:link("/admin/loans", _("Loans"))
| $:link("/admin/waitinglists", _("Waiting Lists"))
| $:link("/admin/block", _("Block IPs"))
@@ -21,4 +22,6 @@
$:link("/admin/graphs", _("Graphs"))
| $:link("/admin/inspect/store", _("Inspect store"))
| $:link("/admin/inspect/memcache", _("Inspect memcache"))
<strong style="padding: 0px 5px"></strong>
$:link("/admin/status", _("Status"))
</div>
@@ -0,0 +1,18 @@
$def with (statuses)
$# :param list[(str, (bool, str))] statuses:

$ _x = ctx.setdefault('bodyid', 'admin')
$ _x = ctx.setdefault('usergroup', 'admin')

<div id="contentHead">
$:render_template("admin/menu")
<h1>Admin Status</h1>
</div>

<div id="contentBody" class="page-admin--status">
$for name, (passing, details) in statuses:
<details>
<summary> $cond(passing, '✔', '❌') <b>$name</b></summary>
<pre>$:details</pre>
</details>
</div>
@@ -1,9 +1,6 @@
import pytest
import re
import requests
from openlibrary.core.vendors import (
split_amazon_title, clean_amazon_metadata_for_load,
_get_betterworldbooks_thirdparty_metadata, BWB_URL,
betterworldbooks_fmt)

def test_clean_amazon_metadata_for_load_non_ISBN():
@@ -85,13 +82,9 @@ def test_clean_amazon_metadata_for_load_subtitle():
assert result.get('full_title') == 'Killers of the Flower Moon : The Osage Murders and the Birth of the FBI'
#TODO: test for, and implement languages

def test_get_betterworldbooks_thirdparty_metadata():
import urllib2
content = urllib2.urlopen(url=BWB_URL).read()
isbn = re.findall('isbn1=\"([0-9]+)\"', content)[0]
assert isbn
data = _get_betterworldbooks_thirdparty_metadata(isbn)
assert data.get('price_amt')

def test_betterworldbooks_fmt():
isbn = '9780393062274'
bad_data = betterworldbooks_fmt(isbn)
assert bad_data.get('isbn') == isbn
assert bad_data.get('price') is None

0 comments on commit dfa0b0a

Please sign in to comment.
You can’t perform that action at this time.