Permalink
Browse files

Merge pull request #1023 from internetarchive/razzius-master

Redesign of Open Library Fulltext Search
  • Loading branch information...
mekarpeles committed Jul 13, 2018
2 parents 85316ed + 1ef4b75 commit b943059e063403629442302658ca8819ef2166c0
@@ -194,6 +194,9 @@ plugin_recaptcha:
public_key: ""
private_key: ""
plugin_inside:
search_endpoint: "https://be-api.us.archive.org/fts/v1/search"
affiliate_ids:
# Amazon is a book source, not just an affiliate, so we make its affiliate
# tag generally available
@@ -202,4 +205,4 @@ affiliate_ids:
internal_tests_api_key: '8oPd1tx747YH374ohs48ZO5s2Nt1r9yD'
ia_availability_api_url: 'https://archive.org/services/loans/beta/loan/index.php' # to be deprecated in favor of _v1 below
ia_availability_api_v1_url: 'https://archive.org/services/loans/beta/loan/index.php'
ia_availability_api_v2_url: 'https://archive.org/services/availability/'
ia_availability_api_v2_url: 'https://archive.org/services/availability/'
@@ -0,0 +1,23 @@
$def with (q, doc=None)
$ ia = doc.get('fields', {}).get('identifier', [''])[0]
$ availability = doc.get('availability', {})
$ snippets = doc.get('highlight', {}).get('text', [''])
$ page_nums = doc.get('fields', {}).get('page_num', [])
$ page = ', '.join([str(num) for num in page_nums])
$if snippets:
<section class="fulltext-excerpts">
$for snippet in snippets:
$if snippet:
<div class="fulltext-excerpt">
&hellip;$:(snippet.replace("<", "&laquo;").replace(">", "&raquo;").replace("{{{", "<span class='highlight'><strong>").replace("}}}", "</strong></span>"))&hellip;
</div>
</section>
$if availability.get('status') == 'open':
<p class="center"><a href="https://archive.org/stream/$(ia)?ref=ol&access=1#search/$(q)">See All Results</a></p>
$if availability.get('status') == 'borrow_available':
<p class="center">Borrow &amp; <a href="https://archive.org/stream/$(ia)?ref=ol&access=1#search/$(q)">See All Results</a></p>
@@ -1,4 +1,4 @@
$def with (doc, decorations=None, cta=True, availability=None, user=None)
$def with (doc, decorations=None, cta=True, availability=None, user=None, extra=None)
$ is_work = doc.get('type', {}).get('key') == '/type/work'
$ book_url = doc.url() if is_work else doc.key
@@ -10,7 +10,7 @@
$elif doc.get('cover_edition_key'):
$ cover = get_coverstore_url() + "/b/olid/%s-M.jpg" % doc.cover_edition_key
$elif doc.get('ocaid'):
$ cover = "//archive.org/download/%s/page/cover_w60_h60.jpg" % dog.get('ocaid')
$ cover = "//archive.org/download/%s/page/cover_w60_h60.jpg" % doc.get('ocaid')
$else:
$ cover = "/images/icons/avatar_book-sm.png"
<a href="$book_url"><img itemprop="image" src="$cover" height="70" alt="Cover of: $doc.title$(': ' + doc.subtitle if doc.get('subtitle', None) else '')" title="Cover of: $doc.title$(': ' + doc.subtitle if doc.get('subtitle', None) else '')"/></a>
@@ -22,6 +22,8 @@
<h3 class="booktitle">
<a itemprop="name" href="$(book_url)"
class="results">$doc.title$(': ' + doc.subtitle if doc.get('subtitle', None) else '')</a>
$if doc.get('publish_date'):
($(doc['publish_date']))
</h3>
</span>
<span class="bookauthor">by
@@ -41,6 +43,12 @@ <h3 class="booktitle">
$else:
$doc.first_publish_year
</span>
$if extra:
<div class="serp-extras">
$:extra
</div>
</span>
<div class="searchResultItemCTA">
@@ -2,6 +2,7 @@
from infogami.utils.view import render_template, public
from infogami import config
from lxml import etree
from openlibrary.core.lending import get_availability_of_ocaids
from openlibrary.utils import escape_bracket
import logging
import re, web, urllib, urllib2, urlparse, simplejson, httplib
@@ -20,30 +21,26 @@ def quote_snippet(self, snippet):
def GET(self):
def get_results(q, offset=0, limit=100):
q = escape_q(q)
results = inside_search_select({'q': q, 'from': offset, 'size': limit})
# If there is any error in gettig the response, return the error
if 'error' in results:
return results
# TODO: This chunk *seems* like it's not achieving anything -- try removing. If all good,
# can collapse `if 'error' in results` condition above.
# ekey_doc = {}
# for doc in results['hits']['hits']:
# ia = doc['fields']['identifier'][0]
# q = {'type': '/type/edition', 'ocaid': ia}
# ekeys = web.ctx.site.things(q)
# if not ekeys:
# del q['ocaid']
# q['source_records'] = 'ia:' + ia
# ekeys = web.ctx.site.things(q)
# if ekeys:
# ekey_doc[ekeys[0]] = doc
# editions = web.ctx.site.get_many(ekey_doc.keys())
# for e in editions:
# ekey_doc[e['key']]['edition'] = e
return results
ia_results = inside_search_select({
'q': escape_q(q), 'from': offset,
'size': limit, 'olonly': 'true'
})
if 'error' not in ia_results and ia_results['hits']:
hits = ia_results['hits'].get('hits', [])
ocaids = [hit['fields'].get('identifier', [''])[0] for hit in hits]
availability = get_availability_of_ocaids(ocaids)
if 'error' in availability:
return []
editions = web.ctx.site.get_many([
'/books/%s' % availability[ocaid].get('openlibrary_edition')
for ocaid in availability
if availability[ocaid].get('openlibrary_edition')])
for ed in editions:
idx = ocaids.index(ed.ocaid)
ia_results['hits']['hits'][idx]['edition'] = ed
ia_results['hits']['hits'][idx]['availability'] = availability[ed.ocaid]
return ia_results
def inside_search_select(params):
if not hasattr(config, 'plugin_inside'):
@@ -107,7 +104,9 @@ def read_from_archive(ia):
if len(v):
item[k] = [i.text for i in v if i.text]
return item
return render_template('search/inside.tmpl', get_results, self.quote_snippet, editions_from_ia, read_from_archive)
page = render_template('search/inside.tmpl', get_results, self.quote_snippet, editions_from_ia, read_from_archive)
page.v2 = True
return page
class snippets(delegate.page):
path = '/search/inside/(.+)'
@@ -368,7 +368,8 @@ $().ready(function(){
'author': 'authors',
'subject': 'subjects',
'all': 'all',
'advanced': 'advancedsearch'
'advanced': 'advancedsearch',
'text': 'inside'
};
var composeSearchUrl = function(q, json, limit, options) {
@@ -10,7 +10,6 @@
<li><a href="/account/create">$_("Sign up")</a></li>
<li><a href="/books/add">$_("Add a Book")</a></li>
<li><a href="https://archive.org/search.php?sin=TXT">$_("Full-Text Search on Archive.org")</a></li>
<li><a href="/random">$_("Random Book")</a></li>
<li><a href="/recentchanges">$_("Recent Community Edits")</a></li>
<li><a href="/advancedsearch">$_("Advanced Search")</a></li>
@@ -40,6 +39,7 @@
<option value='author'>$_("Author")</option>
<option value='subject'>$_("Subject")</option>
<option value='advanced'>$_("Advanced")</option>
<option value='text'>$_("Text")</option>
</select>
</label>
</div>
@@ -87,7 +87,6 @@
<div class="navigation-dropdown-component">
<ul class="dropdown-menu more-menu-options">
<li><a href="/books/add">$_("Add a Book")</a></li>
<li><a href="https://archive.org/search.php?sin=TXT">$_("Full-Text Search on Archive.org")</a></li>
<li><a href="/random">$_("Random Book")</a></li>
<li><a href="/advancedsearch">$_("Advanced Search")</a></li>
<li><a href="/recentchanges">$_("Recent Community Edits")</a></li>
@@ -39,7 +39,7 @@ <h1>Advanced Search</h1>
</fieldset>
<input type="submit" class="generic-button generic-button-primary" value="Search">
<div class="searchPlus">
<a href="https://archive.org/search.php?sin=TXT">Full Text Search</a>?
<a href="/search/inside">Full Text Search</a>?
</div>
</form>
</div>
@@ -2,7 +2,6 @@
$ q = query_param('q')
$ results_per_page = 20
$ no_snippet = set(['printdisabled', 'lendinglibrary', 'browserlending'])
$ page = query_param('page')
$if page:
$ page = int(page)
@@ -19,73 +18,43 @@
$ search_start = time()
$ results = get_results(q, offset=offset, limit=results_per_page)
$ search_secs = time() - search_start
$if 'error' not in results and results['hits']:
$if results and 'error' not in results and results['hits']:
$ hits = results['hits'].get('hits', [])
$ num_found = results['hits'].get('total', 0)
<div id="contentHead">
<h1><span class="word-corner-badge">BETA</span>$_("Search Inside")</h1>
$if q:
$if num_found:
<p class="sansserif darkgreen collapse"><strong>$commify(num_found) hit$("s" if num_found != 1 else "")</strong></p>
$else:
<p class="sansserif red collapse"><strong>No hits</strong></span></p>
<h1>$_("Search Inside")</h1>
</div>
<div id="contentBody">
<div class="section">
<form class="siteSearch searchInsideForm olform" action="">
<input type="text" class="larger" name="q" size="100" style="width: 505px;" value="$q"/>
<input type="submit" class="large" value="$_('Search')"/>
</form>
</div>
$if q and 'error' in results:
<div class="searchResultsError">$results['error']</div>
$if q and 'error' not in results:
<div id="searchResults">
<p>Search took $("%.2f" % search_secs) seconds</p>
<ul id="siteSearch">
$for doc in hits:
$ doc_fields = doc.get('fields', {})
$ ia = doc_fields.get('identifier', [''])[0]
<li>
$ title = doc_fields.get('meta_title', ['no title for: ' + ia])[0]
$ authors = doc_fields.get('meta_creator', [])
$ collection = set(doc_fields.get('meta_collection', []))
$ cover = "//archive.org/download/%s/page/cover_thumb.jpg" % ia
$ url = "//archive.org/details/" + ia
<span class="bookcover"><a href="$url"><img src="$cover" /></a></span>
<span class="details">
<span class="resultTitle">
<h3 class="booktitle sansserif"><a href="$url" class="results">$title</a></h3>
<span class="bookauthor">by
$if authors:
$', '.join(authors)
$else:
<em>unknown author</em>
</span>
<span class="resultPublisher">
<a href="//archive.org/details/$ia">View on archive.org</a>
$ page_nums = doc_fields.get('page_num', [])
$if page_nums:
&bull; page $(', '.join([str(num) for num in page_nums]))
</span>
</span>
</span>
<span class="actions">
$if not (no_snippet & collection):
<a href="//archive.org/stream/$ia?ref=ol#search/$q" title="$_('Open in online Book Reader. Downloads available in ePub, DAISY, PDF, TXT formats from main book page')">
<span class="image read"></span>
<span class="label">$_("Read")</span>
</a>
</span>
<div class="clearfix"></div>
<span class="snippet">
&hellip;$:quote_snippet(doc.get('highlight', {}).get('text', [''])[0])&hellip;
</span>
</li>
</ul>
$:macros.Pager(page, num_found, results_per_page)
<form class="siteSearch searchInsideForm" action="/search/inside">
<input type="text" class="larger" name="q" value="$q"/>
<input type="submit" class="generic-button" value="$_('Search')">
</form>
$if q:
$if 'error' in results:
<div class="searchResultsError">$results['error']</div>
$if not num_found:
<p class="sansserif red collapse">No hits for: <strong>$q</strong></span></p>
$else:
<p class="search-results-stats">About $commify(num_found) result$("s" if num_found != 1 else "") ($("%.2f" % search_secs) seconds)</p>
<div id="searchResults">
<ul id="siteSearch">
$ loans = ctx.user.get_loans() if ctx.user else []
$ waiting_loans = ctx.user.get_waitinglist() if ctx.user else []
$ user = {'loans': loans, 'waitlists': waiting_loans}
$for doc in hits:
$if doc.get('edition'):
$ snippet = macros.FulltextSnippet(q, doc=doc)
$:macros.SearchResultsWork(doc['edition'], availability=doc['availability'], user=user, extra=snippet)
</ul>
$:macros.Pager(page, num_found, results_per_page)
</div>
</div>
Oops, something went wrong.

0 comments on commit b943059

Please sign in to comment.