Skip to content

Commit

Permalink
*goodreads_api.py:
Browse files Browse the repository at this point in the history
- autocomplete_api: generic function to use the autocomplete api from goodreads.com.  API is located at "https://www.goodreads.com/book/auto_complete?format=json&q=XXXXXXXXXXX" where q is a keyword.  It works particularly well with amazon or isbn identifiers.  Example: curl "https://www.goodreads.com/book/auto_complete?format=json&q=B00R3I7OTM" will return a book deskcription with bookId being the goodreads_id equivalent to the asin B00R3I7OTM.

- get_goodreads_id_from_autocomplete: specialized method for the goodreads plugin.  Tales a list of identidiers, in order of importance (for the end-user) and uses the autocomplete_api to the equivalent goodreads_id.  The list of identifiers is in the form of a literal list, stored in the plugin's prefs.  For example, ['goodreads', 'amazon_ca', 'amazon', 'isbn'].  In this case, the plugins, while trying to search-with-identifiers, will first use a goodreads identifiers if present (no request made in this case).  If it's not available, the autocomplete_api is called with the amazon_ca for a match.  If not, the amazon identifier is used, and finally the isbn identifier (if any of those identifiers are available).

*config.py:

- basic changes to allow the users to store their identifier preference.  I had to make minor modifications to integrate the new prefs section seemlesly, it entails loading all the defaults and then updating that dictionnary with the actual prefs.

*__init__.py:

- Made the necessary changes in the indentify method to call autocomplete_api with all cases of "searches by identifier".  It simplidfies the process somewhat.
  • Loading branch information
botmtl committed Jul 28, 2017
1 parent c704b4e commit 0dc49e7
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 47 deletions.
60 changes: 22 additions & 38 deletions __init__.py
Expand Up @@ -18,13 +18,14 @@
from calibre.ebooks.metadata.sources.base import Source, fixcase, fixauthors
from calibre.utils.icu import lower
from calibre.utils.cleantext import clean_ascii_chars
from calibre_plugins.goodreads.goodreads_api import get_goodreads_id_from_autocomplete

class Goodreads(Source):

name = 'Goodreads'
description = _('Downloads metadata and covers from Goodreads')
author = 'Grant Drake with updates by David Forrester'
version = (1, 1, 12)
version = (1, 1, 13)
minimum_calibre_version = (0, 8, 0)

capabilities = frozenset(['identify', 'cover'])
Expand All @@ -50,13 +51,10 @@ def get_book_url(self, identifiers):
return ('goodreads', goodreads_id,
'%s/book/show/%s' % (Goodreads.BASE_URL, goodreads_id))

def create_query(self, log, title=None, authors=None, identifiers={}):
def create_query(self, log, title=None, authors=None):

isbn = check_isbn(identifiers.get('isbn', None))
q = ''
if isbn is not None:
q = 'search_type=books&search[query]=' + isbn
elif title or authors:
if title or authors:
tokens = []
title_tokens = list(self.get_title_tokens(title,
strip_joiners=False, strip_subtitle=True))
Expand Down Expand Up @@ -104,44 +102,34 @@ def identify(self, log, result_queue, abort, title=None, authors=None,
match is found with identifiers.
'''
matches = []
if not identifiers: identifiers={}
# Unlike the other metadata sources, if we have a goodreads id then we
# do not need to fire a "search" at Goodreads.com. Instead we will be
# able to go straight to the URL for that book.
goodreads_id = identifiers.get('goodreads', None)
isbn = check_isbn(identifiers.get('isbn', None))
# By using the autocomplete api, the previous comment is true for any book
# having an identifier that is either goodreads_id, isbn or amazon
log.level = 4
goodreads_id = get_goodreads_id_from_autocomplete(identifiers, timeout, log)
br = self.browser
if goodreads_id:
matches.append('%s/book/show/%s-aaaa' % (Goodreads.BASE_URL, goodreads_id))
else:
query = self.create_query(log, title=title, authors=authors,
identifiers=identifiers)

if not matches:
query = self.create_query(log, title=title, authors=authors)
if query is None:
log.error('Insufficient metadata to construct query')
return
try:
log.info('Querying: %s' % query)
response = br.open_novisit(query, timeout=timeout)
if isbn:
# Check whether we got redirected to a book page for ISBN searches.
# If we did, will use the url.
# If we didn't then treat it as no matches on Goodreads
location = response.geturl()
if '/book/show/' in location:
log.info('ISBN match location: %r' % location)
matches.append(location)
except Exception as e:
err = 'Failed to make identify query: %r' % query
log.exception(err)
return as_unicode(e)

# For ISBN based searches we have already done everything we need to
# So anything from this point below is for title/author based searches.
if not isbn:
log.info('no isbn')
try:
raw = response.read().strip()
#open('E:\\t.html', 'wb').write(raw)
raw = raw.decode('utf-8', errors='replace')
location = response.geturl()
if '/book/show/' in location:
log.info('ISBN match location: %r' % location)
matches.append(location)
if not raw:
log.error('Failed to get raw result for query: %r' % query)
return
Expand All @@ -151,20 +139,17 @@ def identify(self, log, result_queue, abort, title=None, authors=None,
msg = 'Failed to parse goodreads page for query: %r' % query
log.exception(msg)
return msg
# Now grab the first value from the search results, provided the
# title and authors appear to be for the same book
self._parse_search_results(log, title, authors, root, matches, timeout)
except Exception as e:
err = 'Failed to make identify query: %r' % query
log.exception(err)
return as_unicode(e)

if abort.is_set():
return

if not matches:
if identifiers and title and authors:
log.info('No matches found with identifiers, retrying using only'
' title and authors')
return self.identify(log, result_queue, abort, title=title,
authors=authors, timeout=timeout)
log.error('No matches found with query: %r' % query)
log.error(u'No matches found.')
return

from calibre_plugins.goodreads.worker import Worker
Expand Down Expand Up @@ -296,8 +281,7 @@ def ismatch(title):
log.info('Choosing the first audio edition as no others found.')
matches.append(first_non_valid)

def download_cover(self, log, result_queue, abort,
title=None, authors=None, identifiers={}, timeout=30):
def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
log.info('No cached cover found, running identify')
Expand Down
29 changes: 20 additions & 9 deletions config.py
Expand Up @@ -3,6 +3,8 @@
from __future__ import (unicode_literals, division, absolute_import,
print_function)

from PyQt5.QtWidgets import QLineEdit

__license__ = 'GPL v3'
__copyright__ = '2011, Grant Drake <grant.drake@gmail.com>'
__docformat__ = 'restructuredtext en'
Expand All @@ -13,12 +15,12 @@
from PyQt5 import Qt as QtGui
from PyQt5.Qt import (QTableWidgetItem, QVBoxLayout, Qt, QGroupBox, QTableWidget,
QCheckBox, QAbstractItemView, QHBoxLayout, QIcon,
QInputDialog)
QInputDialog, QTextEdit)
except ImportError:
from PyQt4 import QtGui
from PyQt4.Qt import (QTableWidgetItem, QVBoxLayout, Qt, QGroupBox, QTableWidget,
QCheckBox, QAbstractItemView, QHBoxLayout, QIcon,
QInputDialog)
QInputDialog, QTextEdit)
from calibre.gui2 import get_current_db, question_dialog, error_dialog
from calibre.gui2.complete2 import EditWithComplete
from calibre.gui2.metadata.config import ConfigWidget as DefaultConfigWidget
Expand All @@ -30,6 +32,7 @@
KEY_GET_ALL_AUTHORS = 'getAllAuthors'
KEY_GET_EDITIONS = 'getEditions'
KEY_GENRE_MAPPINGS = 'genreMappings'
KEY_IDENTIFIER_ORDER = 'getIdentifierOrder'

DEFAULT_GENRE_MAPPINGS = {
'Anthologies': ['Anthologies'],
Expand Down Expand Up @@ -59,7 +62,7 @@
'Horror': ['Horror'],
'Comedy': ['Humour'],
'Humor': ['Humour'],
'Health': ['Health'],
#'Health': ['Health'], #duplicate (5 lines up)
'Inspirational': ['Inspirational'],
'Sequential Art > Manga': ['Manga'],
'Modern': ['Modern'],
Expand Down Expand Up @@ -94,14 +97,14 @@
DEFAULT_STORE_VALUES = {
KEY_GET_EDITIONS: False,
KEY_GET_ALL_AUTHORS: False,
KEY_GENRE_MAPPINGS: copy.deepcopy(DEFAULT_GENRE_MAPPINGS)
KEY_GENRE_MAPPINGS: copy.deepcopy(DEFAULT_GENRE_MAPPINGS),
KEY_IDENTIFIER_ORDER: "['goodreads', 'isbn', 'amazon', 'amazon_fr','amazon_de','amazon_uk','amazon_it','amazon_jp','amazon_es','amazon_br','amazon_nl','amazon_cn','amazon_ca', 'mobi-asin']"
}

# This is where all preferences for this plugin will be stored
plugin_prefs = JSONConfig('plugins/Goodreads')

# Set defaults
plugin_prefs.defaults[STORE_NAME] = DEFAULT_STORE_VALUES
plugin_prefs = { STORE_NAME:{ } }
plugin_prefs[STORE_NAME].update(DEFAULT_STORE_VALUES)
plugin_prefs[STORE_NAME].update(JSONConfig('config/goodreads'))


class GenreTagMappingsTableWidget(QTableWidget):
Expand Down Expand Up @@ -251,7 +254,14 @@ def __init__(self, plugin):
'e.g. "A (Editor), B (Series Editor)" will return author A\n')
self.all_authors_checkbox.setChecked(c[KEY_GET_ALL_AUTHORS])
other_group_box_layout.addWidget(self.all_authors_checkbox)

self.identifier_order = QLineEdit('Identifiers used in "search by identifier":',self)
self.identifier_order.setToolTip('The order influences greatly the result. For example, [''amazon'', ''goodreads'', ''isbn'']\n'
'will return Kindle Edition results before any other (provided your books have an'
'amazon book id that is of that type. If that is not successful, the search then'
'tries with the goodreads_id that is already associated with this book, if any. '
'Failing that, the isbn will be used to find a corresponding goodreads_id.')
self.identifier_order.setText(c[KEY_IDENTIFIER_ORDER])
other_group_box_layout.addWidget(self.identifier_order)
self.edit_table.populate_table(c[KEY_GENRE_MAPPINGS])

def commit(self):
Expand All @@ -260,6 +270,7 @@ def commit(self):
new_prefs[KEY_GET_EDITIONS] = self.get_editions_checkbox.checkState() == Qt.Checked
new_prefs[KEY_GET_ALL_AUTHORS] = self.all_authors_checkbox.checkState() == Qt.Checked
new_prefs[KEY_GENRE_MAPPINGS] = self.edit_table.get_data()
new_prefs[KEY_IDENTIFIER_ORDER] = self.identifier_order.text()
plugin_prefs[STORE_NAME] = new_prefs

def add_mapping(self):
Expand Down
68 changes: 68 additions & 0 deletions goodreads_api.py
@@ -0,0 +1,68 @@
from ast import literal_eval
from calibre_plugins.goodreads.config import plugin_prefs, STORE_NAME, KEY_IDENTIFIER_ORDER
from calibre.utils.logging import ThreadSafeLog

def get_goodreads_id_from_autocomplete(identifiers, timeout, log):
# type: (dict, int, ThreadSafeLog) -> unicode or None
"""
This method allows users to set their own priority for identifier based searches.
The priority is set by changing the order of the key names in the preferences.
:rtype: unicode or None
:param timeout: int: timeout
:param identifiers: dict: available identifiers for this book, will be filtered
and sorted according to plugin_prefs[STORE_NAME][KEY_IDENTIFIER_ORDER]
:param log: ThreadSafeLog: logging utility
:return: unicode or None: a goodread_id or None
"""
if not identifiers: identifiers={}
try:
#key_order is stored as a literal list of keys ['goodreads', 'amazon_ca', 'isbn']
key_order = literal_eval(plugin_prefs[STORE_NAME][KEY_IDENTIFIER_ORDER])
#for every key defined in plugin_prefs that exists this book's identifiers, create a
#list(idenfifier_name:identifier_value) where identifier_name is in the same order as
#key_order.
keys = {unicode(identifier_name):unicode(identifiers[identifier_name]) for identifier_name in key_order if identifier_name in identifiers.keys()}
log.info('Identifier keys will be used in this order to find an equivalent goodread_id:', keys)
except:
log.error('The plugin configuration is bad, and you should feel bad.')
return None

#for every key candidate
for identifier_name,identifier_value in keys.items():
#goodreads key is not found by using api
if identifier_name == 'goodreads':
return identifier_value
try:
result = autocomplete_api(identifier_value, timeout, log)
goodreads_id = result.get('bookId')
if goodreads_id:
log.info('autocomplete found a match for ', {identifier_name:identifier_value}, ' ==> ', {'goodreads':goodreads_id})
return goodreads_id
except:
#very likely the only exception here is a timeout
pass
return None


def autocomplete_api(search_terms, timeout, log):
# type: (unicode, int, ThreadSafeLog) -> dict or None
"""
:param timeout: int: urlopen will raise an exception
(caught in get_goodreads_id_from_autocomplete) after this time
:param search_terms: unicode: search term(s)
:param log: ThreadSafeLog: logging utility
:return: dict: a dictionnary representing the first book found by the api.
"""
from urllib2 import urlopen
import json
search_terms = search_terms.strip()
if search_terms is None: return None

autocomplete_api_url = "https://www.goodreads.com/book/auto_complete?format=json&q="
log.info('autocomplete url:', autocomplete_api_url, search_terms)
response = urlopen(autocomplete_api_url + search_terms, timeout=timeout).read()
if response:
result = json.loads(response)
if len(result) >= 1:
return result[0]
return None

0 comments on commit 0dc49e7

Please sign in to comment.