Skip to content
Permalink
Browse files

fixes search tests, moves canonicalize to models, fix random book

  • Loading branch information...
mekarpeles authored and root committed May 23, 2019
1 parent 5bfa0a3 commit a57de4d99e655c21be2da96c0a3c0ff6d965126b
@@ -83,7 +83,7 @@ def setup(config):
config_ia_loan_api_developer_key = config.get('ia_loan_api_developer_key')
config_internal_tests_api_key = config.get('internal_tests_api_key')
config_http_request_timeout = config.get('http_request_timeout')


def get_availability(key, ids):
url = '%s?%s=%s' % (config_ia_availability_api_v2_url, key, ','.join(ids))
@@ -28,7 +28,7 @@
from . import loanstats
from . import waitinglist
from . import lending
from . import search


def _get_ol_base_url():
# Anand Oct 2013
@@ -203,13 +203,41 @@ class Edition(Thing):
"""Class to represent /type/edition objects in OL.
"""

@staticmethod
def canonicalize(edition):
work = edition.works and edition.works[0]

# Use ocaid as canonical internet archive identifier
edition.ocaid = (
edition.get('ocaid') or
(edition.get('ia') and edition.ia[0] if isinstance(edition.ia, list)
else edition.ia) or
edition.availability and edition.availability.identifier
)

# Ensure author is set
edition.authors = [web.storage(key=a.key, name=a.name or None) for a in
(work or edition).get_authors()]

# Get bookcover from edition, or work, IA fallback, or default
edition.cover_url = (
next((doc.get_cover().url('M')
for doc in [edition, work]
if doc and doc.get_cover()), None)
or (edition.ocaid
and 'https://archive.org/services/img/%s' % edition.ocaid)
or '/images/icons/avatar_book.png'
)
return edition

@staticmethod
def get_random_available():
"""Uses archive.org AdvancedSearch API to find a random available
edition on Open Library
"""
results = search.get_editions_by_ia_query(
limit=1, sorts=['random'])
from . import search
results = search.editions_by_ia_query(
limit=1, sorts=['random'])
if results.get('editions'):
return results.get('editions')[0]

@@ -364,10 +392,6 @@ def is_lendable_book(self):
"""
return self.in_borrowable_collection()

@staticmethod
def random_available():
return search.EditionSearch.random_available()

def get_ia_download_link(self, suffix):
"""Returns IA download link for given suffix.
The suffix is usually one of '.pdf', '.epub', '.mobi', '_djvu.txt'
@@ -9,13 +9,14 @@
from infogami.utils import delegate

from openlibrary.utils import dateutil
from openlibrary.core.models import Edition
from openlibrary.core.helpers import get_coverstore_url
from openlibrary.core.helpers import bookreader_host
from openlibrary.core.lending import config_http_request_timeout


PRESET_QUERIES = {
'preset:modern': 'languageSorter:"English" AND (year:"2013" OR year:"2014" OR year:"2015")',
'preset:modern': 'languageSorter:"English" AND (year:"2014" OR year:"2015")',
'preset:thrillers': '(creator:"Clancy, Tom" OR creator:"King, Stephen" OR creator:"Clive Cussler" OR creator:("Cussler, Clive") OR creator:("Dean Koontz") OR creator:("Koontz, Dean") OR creator:("Higgins, Jack")) AND !publisher:"Pleasantville, N.Y. : Reader\'s Digest Association" AND languageSorter:"English"',
'preset:children': '(creator:("parish, Peggy") OR creator:("avi") OR title:("goosebumps") OR creator:("Dahl, Roald") OR creator:("ahlberg, allan") OR creator:("Seuss, Dr") OR creator:("Carle, Eric") OR creator:("Pilkey, Dav"))',
'preset:comics': '(subject:"comics" OR creator:("Gary Larson") OR creator:("Larson, Gary") OR creator:("Charles M Schulz") OR creator:("Schulz, Charles M") OR creator:("Jim Davis") OR creator:("Davis, Jim") OR creator:("Bill Watterson") OR creator:("Watterson, Bill") OR creator:("Lee, Stan"))',
@@ -55,14 +56,14 @@ def editions_by_ia_query(query='', sorts=None, page=1, limit=None):
url = _compose_advancedsearch_url(**params)
response = _request(url)
items = response.get('docs', [])
work2item = _index_item_by_distinct_work(items)
item_index = _index_items_by_ocaid(items)
editions = [
_add_availability_to_edition(
edition.canonicalize, work2item).dict()
for edition in web.ctx.site.get_many([
Edition.canonicalize(_add_availability_to_edition(
edition, item_index)).dict() for edition
in web.ctx.site.get_many([
'/books/%s' % item['openlibrary_edition']
for item in work2item.values()
]) if _item_matching_edition(edition, work2item)
for item in item_index.values()
])
]

return {
@@ -117,7 +118,7 @@ def _clean_params(q='', sorts='', page=1, limit=MAX_IA_RESULTS):
params = {
'q': q,
'page': page,
'limit': min(limit, MAX_IA_RESULTS),
'rows': min(limit, MAX_IA_RESULTS),
'sort[]': sorts, # broken for encoding of + -> %2B
'fl[]': RETURN_FIELDS,
'output': 'json'
@@ -156,38 +157,24 @@ def _request(url):
except Exception as e:
return []

def _index_item_by_distinct_work(items):
"""Filter duplicate editions (items with the same work)
to ensure a single edition (item) per work
"""
return dict(('/works/%s' % item['openlibrary_work'], item)
for item in items if item.get('openlibrary_work'))

def _item_matching_edition(edition, work2item):
"""An edition may belong to multiple works, especially if those works
were merged duplicates. This method tells us which work is listed in the
work2item mapping.
"""
return edition.works and next((
work2item.get(work.key) for work in edition.works if work.key in work2item
), None)
def _index_items_by_ocaid(items):
return dict((item['identifier'], item) for item in items)

def _add_availability_to_edition(edition, work2item):
def _add_availability_to_edition(edition, item_index):
"""
To avoid a 2nd network call to `lending.add_availability`
reconstruct availability ad-hoc from archive.org
reconstruct availability info ad-hoc from archive.org
advancedsearch results
XXX needs to be more robust if not item
"""
item = _item_matching_edition(edition, work2item)
item = item_index[edition.ocaid]
availability_status = (
'borrow_%s' % item[AVAILABILITY_STATUS].lower()
('borrow_%s' % item[AVAILABILITY_STATUS].lower())
if item.get(AVAILABILITY_STATUS) else 'open')
edition['availability'] = {
'status': availability_status,
'identifier': item['identifier'],
'openlibrary_edition': item['openlibrary_edition'],
'openlibrary_work': item['openlibrary_work']
'identifier': item.get('identifier', ''),
'openlibrary_edition': item.get('openlibrary_edition', ''),
'openlibrary_work': item.get('openlibrary_work', '')
}
return edition

@@ -20,7 +20,7 @@
$else:
$ cover_url = '%s/b/ia/%s-M.jpg?default=%s%s' % (cover_host, ocaid, fallback_cover, ocaid)
$ img_attr = 'data-lazy' if lazy_cover else 'src'
$ byline = ' by ' + ', '.join([author.name for author in book.authors]) if book.get('authors') else ''
$ byline = ' by ' + ', '.join([author.get('name', '') for author in book.authors]) if book.get('authors') else ''

<div class="book carousel__item">
<div class="book-cover">
@@ -57,6 +57,7 @@ <h2 class="home-h2">
<script type="text/javascript">
window.q.push(function() {
var addWork = function(work) {
// XXX byline needed in title
var ocaid = work.availability.identifier;
var availabilityTypes = $:(availability_types);
var availability = work.availability.status;
@@ -1,8 +1,6 @@
$def with (page)

$if page.type.key == '/type/work' and page.edition_count == 1:
$ edit_url = page.get_one_edition().get_url(suffix="/edit") or page.url(suffix="/edit")
$elif page.type.key in ["/type/work", "/type/edition", "/type/author"]:
$if page.type.key in ["/type/work", "/type/edition", "/type/author"]:
$ edit_url = page.url(suffix="/edit")
$else:
$ edit_url = page.key + "?m=edit"
@@ -336,6 +336,8 @@ def populate_edition_data(self, edition, identifier):
"""
edition['ocaid'] = identifier
edition['source_records'] = "ia:" + identifier
# XXX Maybe this should use cover_t.jpg instead of title.jpg?
# https://archive.org/download/eustacediamonds00trol_1/page/cover_t.jpg
edition['cover'] = "{0}/download/{1}/{1}/page/title.jpg".format(IA_BASE_URL, identifier)
return edition

@@ -23,13 +23,12 @@ def get_editions_by_ia_query(query='', sorts=None, page=1, limit=None,
@public
def cached_random_readable_works():
# cache 2k classic works in memcache for 15 minutes
cached_works = cache.memcache_memoize(
works = cache.memcache_memoize(
random_readable_works, "carousel.classics",
timeout=15*dateutil.MINUTE_SECS)()
# sample results to appear random
if len(cached_works) > 60:
works = random.sample(cached_works, 60)
return storify(works)
timeout=15*dateutil.MINUTE_SECS)() or []
return storify(
random.sample(works, 60) if len(works) > 60 else works
)


def setup():
@@ -200,7 +200,7 @@ class random_book(delegate.page):
def GET(self):
ed = Edition.get_random_available()
if ed:
raise web.seeother(ed.key)
raise web.seeother(ed['key'])
raise web.seeother("/")

class addbook(delegate.page):
@@ -33,14 +33,13 @@ def get_homepage():
"home/index", stats=stats,
blog_posts=get_blog_feeds()
)
page.v2 = True
page.v2 = True
return dict(page)

# when homepage is cached, home/index.html template doesn't
# run ctx.setdefault to set the bodyid so we must do so here:
delegate.context.setdefault('bodyid', 'home')
return web.template.TemplateResult(
get_homepage() or # XXX delete this line! Testing
cache.memcache_memoize(
get_homepage, "home.homepage", timeout=5 * dateutil.MINUTE_SECS)())

@@ -7,7 +7,7 @@ const Carousel = {
* loadMore params for lazy-loading more pages of results on the fly
* @param {string} selector to bind carousel
* @param {string} a - f are number of items to render at different mobile breakpoints
* @param {{
* @param {{
* url:string endpoint for fetching additional results
* getItems:function which extracts item values out of the API response
* addItem:function which consumes and item and returns html to add as a new slide
@@ -111,7 +111,7 @@ const Carousel = {

// update the current page or offset within the URL
url.searchParams.set(loadMore.pageMode, loadMore.page);

$.ajax({
url: url,
type: 'GET',
@@ -3,16 +3,20 @@
import sys
import web

from infogami.utils.view import render_template
#from infogami.utils.view import render_template
#from openlibrary.app import render_template
from infogami.utils import template, context
from infogami.utils.macro import codemacros, macrostore, load_macros
from openlibrary.i18n import gettext
from openlibrary.core.admin import Stats
from bs4 import BeautifulSoup

import six

from openlibrary.core import cache
from openlibrary import core
from openlibrary.plugins.openlibrary import home
from openlibrary.plugins.openlibrary import carousels
from openlibrary.core.models import Edition

class MockDoc(dict):
def __init__(self, _id, *largs, **kargs):
@@ -53,7 +57,7 @@ def test_stats_template(self, render_template):
html = six.text_type(render_template("home/stats"))
assert html == ""

def test_home_template(self, render_template, mock_site):
def test_home_template(self, render_template, mock_site, monkeypatch):
docs = [MockDoc(_id=datetime.datetime.now().strftime("counts-%Y-%m-%d"),
human_edits=1, bot_edits=1, lists=1,
visitors=1, loans=1, members=1,
@@ -73,25 +77,13 @@ def test_home_template(self, render_template, mock_site):
subjects = Stats(docs, "subjects", "total_subjects"))

mock_site.quicksave("/people/foo/lists/OL1L", "/type/list")

def spoofed_generic_carousel(*args, **kwargs):
return [{
"work": None,
"key": "/books/OL1M",
"url": "/books/OL1M",
"title": "The Great Book",
"authors": [web.storage({
"key": "/authors/OL1A",
"name": "Some Author"
})],
"read_url": "http://archive.org/stream/foo",
"borrow_url": "/books/OL1M/foo/borrow",
"inlibrary_borrow_url": "/books/OL1M/foo/borrow",
"cover_url": ""
}]
web.template.Template.globals['cached_random_readable_works'] = lambda: []
web.template.Template.globals['get_editions_by_ia_query'] = lambda: {}
load_macros('openlibrary', lazy=True)
html = six.text_type(render_template("home/index", stats=stats, test=True))
headers = ["Books We Love", "Recently Returned", "Kids",
"Thrillers", "New Arrivals", "Classic Books", "Textbooks"]
headers = ["Classic Books", "Books We Love", "Recently Returned",
"Recently Added", "Textbooks", "Kids",
"Authors Alliance &amp; MIT Press"]
for h in headers:
assert h in html

@@ -107,23 +99,26 @@ def test_all(self, mock_site, mock_ia):
def test_authors(self, mock_site, mock_ia):
a1 = mock_site.quicksave("/authors/OL1A", "/type/author", name="A1")
a2 = mock_site.quicksave("/authors/OL2A", "/type/author", name="A2")
work = mock_site.quicksave("/works/OL1W", "/type/work", title="Foo", authors=[{"author": {"key": "/authors/OL2A"}}])
work = mock_site.quicksave("/works/OL1W", "/type/work", title="Foo",
authors=[{"author": {"key": "/authors/OL2A"}}])

book = mock_site.quicksave("/books/OL1M", "/type/edition", title="Foo")

import ipdb
ipdb.set_trace()

assert book.canonicalize['authors'] == []
assert Edition.canonicalize(book)['authors'] == []

# when there is no work and authors, the authors field must be picked from the book
book = mock_site.quicksave("/books/OL1M", "/type/edition", title="Foo", authors=[{"key": "/authors/OL1A"}])
assert book.canonicalize['authors'] == [{"key": "/authors/OL1A", "name": "A1"}]
# when there is no work and authors, the authors field must be
# picked from the book
book = mock_site.quicksave(
"/books/OL1M", "/type/edition", title="Foo",
authors=[{"key": "/authors/OL1A"}])
assert (Edition.canonicalize(book)['authors'] ==
[{"key": "/authors/OL1A", "name": "A1"}])

# when there is work, the authors field must be picked from the work
book = mock_site.quicksave("/books/OL1M", "/type/edition",
title="Foo",
authors=[{"key": "/authors/OL1A"}],
works=[{"key": "/works/OL1W"}]
)
assert book.canonicalize['authors'] == [{"key": "/authors/OL2A", "name": "A2"}]
assert (Edition.canonicalize(book)['authors'] ==
[{"key": "/authors/OL2A", "name": "A2"}])

0 comments on commit a57de4d

Please sign in to comment.
You can’t perform that action at this time.