*goodreads_api.py:

- autocomplete_api: generic function to use the autocomplete api from goodreads.com. API is located at "https://www.goodreads.com/book/auto_complete?format=json&q=XXXXXXXXXXX" where q is a keyword. It works particularly well with amazon or isbn identifiers. Example: curl "https://www.goodreads.com/book/auto_complete?format=json&q=B00R3I7OTM" will return a book deskcription with bookId being the goodreads_id equivalent to the asin B00R3I7OTM. - get_goodreads_id_from_autocomplete: specialized method for the goodreads plugin. Tales a list of identidiers, in order of importance (for the end-user) and uses the autocomplete_api to the equivalent goodreads_id. The list of identifiers is in the form of a literal list, stored in the plugin's prefs. For example, ['goodreads', 'amazon_ca', 'amazon', 'isbn']. In this case, the plugins, while trying to search-with-identifiers, will first use a goodreads identifiers if present (no request made in this case). If it's not available, the autocomplete_api is called with the amazon_ca for a match. If not, the amazon identifier is used, and finally the isbn identifier (if any of those identifiers are available). *config.py: - basic changes to allow the users to store their identifier preference. I had to make minor modifications to integrate the new prefs section seemlesly, it entails loading all the defaults and then updating that dictionnary with the actual prefs. *__init__.py: - Made the necessary changes in the indentify method to call autocomplete_api with all cases of "searches by identifier". It simplidfies the process somewhat.
botmtl · Jul 28, 2017 · 0dc49e7 · 0dc49e7
1 parent c704b4e
commit 0dc49e7
Show file tree

Hide file tree

Showing 3 changed files with 110 additions and 47 deletions.
diff --git a/__init__.py b/__init__.py
@@ -18,13 +18,14 @@
 from calibre.ebooks.metadata.sources.base import Source, fixcase, fixauthors
 from calibre.utils.icu import lower
 from calibre.utils.cleantext import clean_ascii_chars
+from calibre_plugins.goodreads.goodreads_api import get_goodreads_id_from_autocomplete
 
 class Goodreads(Source):
 
     name = 'Goodreads'
     description = _('Downloads metadata and covers from Goodreads')
     author = 'Grant Drake with updates by David Forrester'
-    version = (1, 1, 12)
+    version = (1, 1, 13)
     minimum_calibre_version = (0, 8, 0)
 
     capabilities = frozenset(['identify', 'cover'])
@@ -50,13 +51,10 @@ def get_book_url(self, identifiers):
             return ('goodreads', goodreads_id,
                     '%s/book/show/%s' % (Goodreads.BASE_URL, goodreads_id))
 
-    def create_query(self, log, title=None, authors=None, identifiers={}):
+    def create_query(self, log, title=None, authors=None):
 
-        isbn = check_isbn(identifiers.get('isbn', None))
         q = ''
-        if isbn is not None:
-            q = 'search_type=books&search[query]=' + isbn
-        elif title or authors:
+        if title or authors:
             tokens = []
             title_tokens = list(self.get_title_tokens(title,
                                 strip_joiners=False, strip_subtitle=True))
@@ -104,44 +102,34 @@ def identify(self, log, result_queue, abort, title=None, authors=None,
         match is found with identifiers.
         '''
         matches = []
+        if not identifiers: identifiers={}
         # Unlike the other metadata sources, if we have a goodreads id then we
         # do not need to fire a "search" at Goodreads.com. Instead we will be
         # able to go straight to the URL for that book.
-        goodreads_id = identifiers.get('goodreads', None)
-        isbn = check_isbn(identifiers.get('isbn', None))
+        # By using the autocomplete api, the previous comment is true for any book
+        # having an identifier that is either goodreads_id, isbn or amazon
+        log.level = 4
+        goodreads_id = get_goodreads_id_from_autocomplete(identifiers, timeout, log)
         br = self.browser
         if goodreads_id:
             matches.append('%s/book/show/%s-aaaa' % (Goodreads.BASE_URL, goodreads_id))
-        else:
-            query = self.create_query(log, title=title, authors=authors,
-                    identifiers=identifiers)
+
+        if not matches:
+            query = self.create_query(log, title=title, authors=authors)
             if query is None:
                 log.error('Insufficient metadata to construct query')
                 return
             try:
                 log.info('Querying: %s' % query)
                 response = br.open_novisit(query, timeout=timeout)
-                if isbn:
-                    # Check whether we got redirected to a book page for ISBN searches.
-                    # If we did, will use the url.
-                    # If we didn't then treat it as no matches on Goodreads
-                    location = response.geturl()
-                    if '/book/show/' in location:
-                        log.info('ISBN match location: %r' % location)
-                        matches.append(location)
-            except Exception as e:
-                err = 'Failed to make identify query: %r' % query
-                log.exception(err)
-                return as_unicode(e)
-
-            # For ISBN based searches we have already done everything we need to
-            # So anything from this point below is for title/author based searches.
-            if not isbn:
-                log.info('no isbn')
                 try:
                     raw = response.read().strip()
                     #open('E:\\t.html', 'wb').write(raw)
                     raw = raw.decode('utf-8', errors='replace')
+                    location = response.geturl()
+                    if '/book/show/' in location:
+                        log.info('ISBN match location: %r' % location)
+                        matches.append(location)
                     if not raw:
                         log.error('Failed to get raw result for query: %r' % query)
                         return
@@ -151,20 +139,17 @@ def identify(self, log, result_queue, abort, title=None, authors=None,
                     msg = 'Failed to parse goodreads page for query: %r' % query
                     log.exception(msg)
                     return msg
-                # Now grab the first value from the search results, provided the
-                # title and authors appear to be for the same book
                 self._parse_search_results(log, title, authors, root, matches, timeout)
+            except Exception as e:
+                err = 'Failed to make identify query: %r' % query
+                log.exception(err)
+                return as_unicode(e)
 
         if abort.is_set():
             return
 
         if not matches:
-            if identifiers and title and authors:
-                log.info('No matches found with identifiers, retrying using only'
-                        ' title and authors')
-                return self.identify(log, result_queue, abort, title=title,
-                        authors=authors, timeout=timeout)
-            log.error('No matches found with query: %r' % query)
+            log.error(u'No matches found.')
             return
 
         from calibre_plugins.goodreads.worker import Worker
@@ -296,8 +281,7 @@ def ismatch(title):
             log.info('Choosing the first audio edition as no others found.')
             matches.append(first_non_valid)
 
-    def download_cover(self, log, result_queue, abort,
-            title=None, authors=None, identifiers={}, timeout=30):
+    def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
         cached_url = self.get_cached_cover_url(identifiers)
         if cached_url is None:
             log.info('No cached cover found, running identify')

diff --git a/config.py b/config.py
@@ -3,6 +3,8 @@
 from __future__ import (unicode_literals, division, absolute_import,
                         print_function)
 
+from PyQt5.QtWidgets import QLineEdit
+
 __license__   = 'GPL v3'
 __copyright__ = '2011, Grant Drake <grant.drake@gmail.com>'
 __docformat__ = 'restructuredtext en'
@@ -13,12 +15,12 @@
     from PyQt5 import Qt as QtGui
     from PyQt5.Qt import (QTableWidgetItem, QVBoxLayout, Qt, QGroupBox, QTableWidget,
                           QCheckBox, QAbstractItemView, QHBoxLayout, QIcon,
-                          QInputDialog)
+                          QInputDialog, QTextEdit)
 except ImportError:
     from PyQt4 import QtGui
     from PyQt4.Qt import (QTableWidgetItem, QVBoxLayout, Qt, QGroupBox, QTableWidget,
                           QCheckBox, QAbstractItemView, QHBoxLayout, QIcon,
-                          QInputDialog)
+                          QInputDialog, QTextEdit)
 from calibre.gui2 import get_current_db, question_dialog, error_dialog
 from calibre.gui2.complete2 import EditWithComplete
 from calibre.gui2.metadata.config import ConfigWidget as DefaultConfigWidget
@@ -30,6 +32,7 @@
 KEY_GET_ALL_AUTHORS = 'getAllAuthors'
 KEY_GET_EDITIONS = 'getEditions'
 KEY_GENRE_MAPPINGS = 'genreMappings'
+KEY_IDENTIFIER_ORDER = 'getIdentifierOrder'
 
 DEFAULT_GENRE_MAPPINGS = {
                 'Anthologies': ['Anthologies'],
@@ -59,7 +62,7 @@
                 'Horror': ['Horror'],
                 'Comedy': ['Humour'],
                 'Humor': ['Humour'],
-                'Health': ['Health'],
+                #'Health': ['Health'],  #duplicate (5 lines up)
                 'Inspirational': ['Inspirational'],
                 'Sequential Art > Manga': ['Manga'],
                 'Modern': ['Modern'],
@@ -94,14 +97,14 @@
 DEFAULT_STORE_VALUES = {
     KEY_GET_EDITIONS: False,
     KEY_GET_ALL_AUTHORS: False,
-    KEY_GENRE_MAPPINGS: copy.deepcopy(DEFAULT_GENRE_MAPPINGS)
+    KEY_GENRE_MAPPINGS: copy.deepcopy(DEFAULT_GENRE_MAPPINGS),
+    KEY_IDENTIFIER_ORDER: "['goodreads', 'isbn', 'amazon', 'amazon_fr','amazon_de','amazon_uk','amazon_it','amazon_jp','amazon_es','amazon_br','amazon_nl','amazon_cn','amazon_ca', 'mobi-asin']"
 }
 
 # This is where all preferences for this plugin will be stored
-plugin_prefs = JSONConfig('plugins/Goodreads')
-
-# Set defaults
-plugin_prefs.defaults[STORE_NAME] = DEFAULT_STORE_VALUES
+plugin_prefs = { STORE_NAME:{ } }
+plugin_prefs[STORE_NAME].update(DEFAULT_STORE_VALUES)
+plugin_prefs[STORE_NAME].update(JSONConfig('config/goodreads'))
 
 
 class GenreTagMappingsTableWidget(QTableWidget):
@@ -251,7 +254,14 @@ def __init__(self, plugin):
                                               'e.g. "A (Editor), B (Series Editor)" will return author A\n')
         self.all_authors_checkbox.setChecked(c[KEY_GET_ALL_AUTHORS])
         other_group_box_layout.addWidget(self.all_authors_checkbox)
-
+        self.identifier_order = QLineEdit('Identifiers used in "search by identifier":',self)
+        self.identifier_order.setToolTip('The order influences greatly the result.  For example, [''amazon'', ''goodreads'', ''isbn'']\n'
+                                         'will return Kindle Edition results before any other (provided your books have an'
+                                         'amazon book id that is of that type.  If that is not successful, the search then'
+                                         'tries with the goodreads_id that is already associated with this book, if any.  '
+                                         'Failing that, the isbn will be used to find a corresponding goodreads_id.')
+        self.identifier_order.setText(c[KEY_IDENTIFIER_ORDER])
+        other_group_box_layout.addWidget(self.identifier_order)
         self.edit_table.populate_table(c[KEY_GENRE_MAPPINGS])
 
     def commit(self):
@@ -260,6 +270,7 @@ def commit(self):
         new_prefs[KEY_GET_EDITIONS] = self.get_editions_checkbox.checkState() == Qt.Checked
         new_prefs[KEY_GET_ALL_AUTHORS] = self.all_authors_checkbox.checkState() == Qt.Checked
         new_prefs[KEY_GENRE_MAPPINGS] = self.edit_table.get_data()
+        new_prefs[KEY_IDENTIFIER_ORDER] = self.identifier_order.text()
         plugin_prefs[STORE_NAME] = new_prefs
 
     def add_mapping(self):

diff --git a/goodreads_api.py b/goodreads_api.py
@@ -0,0 +1,68 @@
+from ast import literal_eval
+from calibre_plugins.goodreads.config import plugin_prefs, STORE_NAME, KEY_IDENTIFIER_ORDER
+from calibre.utils.logging import ThreadSafeLog
+
+def get_goodreads_id_from_autocomplete(identifiers, timeout, log):
+    # type: (dict, int, ThreadSafeLog) -> unicode or None
+    """
+    This method allows users to set their own priority for identifier based searches.
+    The priority is set by changing the order of the key names in the preferences.
+    :rtype: unicode or None
+    :param timeout: int: timeout
+    :param identifiers: dict: available identifiers for this book, will be filtered
+    and sorted according to plugin_prefs[STORE_NAME][KEY_IDENTIFIER_ORDER]
+    :param log: ThreadSafeLog: logging utility
+    :return: unicode or None: a goodread_id or None
+    """
+    if not identifiers: identifiers={}
+    try:
+        #key_order is stored as a literal list of keys ['goodreads', 'amazon_ca', 'isbn']
+        key_order = literal_eval(plugin_prefs[STORE_NAME][KEY_IDENTIFIER_ORDER])
+        #for every key defined in plugin_prefs that exists this book's identifiers, create a
+        #list(idenfifier_name:identifier_value) where identifier_name is in the same order as
+        #key_order.
+        keys = {unicode(identifier_name):unicode(identifiers[identifier_name]) for identifier_name in key_order if identifier_name in identifiers.keys()}
+        log.info('Identifier keys will be used in this order to find an equivalent goodread_id:', keys)
+    except:
+        log.error('The plugin configuration is bad, and you should feel bad.')
+        return None
+
+    #for every key candidate
+    for identifier_name,identifier_value in keys.items():
+        #goodreads key is not found by using api
+        if identifier_name == 'goodreads':
+            return identifier_value
+        try:
+            result = autocomplete_api(identifier_value, timeout, log)
+            goodreads_id = result.get('bookId')
+            if goodreads_id:
+                log.info('autocomplete found a match for ', {identifier_name:identifier_value}, ' ==> ', {'goodreads':goodreads_id})
+                return goodreads_id
+        except:
+            #very likely the only exception here is a timeout
+            pass
+    return None
+
+
+def autocomplete_api(search_terms, timeout, log):
+    # type: (unicode, int, ThreadSafeLog) -> dict or None
+    """
+    :param timeout: int: urlopen will raise an exception
+    (caught in get_goodreads_id_from_autocomplete) after this time
+    :param search_terms: unicode: search term(s)
+    :param log: ThreadSafeLog: logging utility
+    :return: dict: a dictionnary representing the first book found by the api.
+    """
+    from urllib2 import urlopen
+    import json
+    search_terms = search_terms.strip()
+    if search_terms is None: return None
+
+    autocomplete_api_url = "https://www.goodreads.com/book/auto_complete?format=json&q="
+    log.info('autocomplete url:', autocomplete_api_url, search_terms)
+    response = urlopen(autocomplete_api_url + search_terms, timeout=timeout).read()
+    if response:
+        result = json.loads(response)
+        if len(result) >= 1:
+            return result[0]
+    return None