In [29]:
import requests
import xmltodict

In [30]:
def create_book(title, author, isbn):
    return { 'title': title, 'author': author, 'isbn': isbn}

my_books = [create_book('Test Driven Development: By Example', 'Kent Beck', '0321146530'), 
         create_book('Bloodchild and Other Stories', 'Octavia E. Butler', '1583226982'), 
         create_book('The Machinery of Life', 'David S. Goodsell', '0387849246'), 
           create_book('Ulysses', 'James Joyce', '1494405490')]

In [31]:
key_goodreads = 'SEX1X3AqNPEW9w5U8AfUA'
base_url_goodreads = 'https://www.goodreads.com/search.xml?key=' + key_goodreads 

In [32]:
def request_goodreads_books_by_title(title):
    url = base_url_goodreads + '&q=' + encode_white_spaces(title) + '&search[title]'
    log_url('goodreads', url)
    return requests.get(url)

In [33]:
def encode_white_spaces(string):
    return string.replace(' ', '%20')

In [34]:
#Author and title variables
    author="Clive Cussler"
    title="Sahara"
    
    # you need to fill white space in with %20
    a = encode_white_spaces(author)
    t = encode_white_spaces(title)
    
    url = 'https://www.goodreads.com/book/title.xml?author=%s&key=%s&title=%s' % (a, key_goodreads, t)
    
    print (url)

https://www.goodreads.com/book/title.xml?author=Clive%20Cussler&key=SEX1X3AqNPEW9w5U8AfUA&title=Sahara


Now we have a URL!  If you open this up in a browser, you can see elements of the json presented to you.

You can use "requests" in conjunction with xmltidict to parse. 

In [44]:
req = requests.get(url)

r = xmltodict.parse(req.content)['GoodreadsResponse']['book']


You can access the data as is in json format by referring to it's elements in the hierarchy.  So to print out the asin number

In [45]:
print(r['asin'])

None


You can also add the json data to a dictionary, as shown below, and access elements by referring to them by name.  Here is a small subset of the API data added to a dictionary.

In [46]:
goodreads_data_dictionary = {
'isbn': r['isbn'],
'isbn13' : r['isbn13'],
'title': r['title'],
'year': r['publication_year'],
'original_publication_year': r['work']['original_publication_year']['#text'],
'publisher': r['publisher'],
'language': r['language_code'],
'description': r['description'],
'pages': r['num_pages']
}

#print all
print (goodreads_data_dictionary)

print()

#print specific element
print (goodreads_data_dictionary.get("isbn", "none"))



{'isbn': '0671906062', 'isbn13': '9780671906061', 'title': 'Treasure / Dragon / Sahara: Clive Cussler Gift Set (Dirk Pitt, #9, #10, #11)', 'year': None, 'original_publication_year': '1996', 'publisher': None, 'language': 'eng', 'description': None, 'pages': None}

0671906062


You can crete an object an store dictionary values i it as well.  In the case below, we're creating an object called Book.  Use the "setAttributes" function to populate it with dictionary items.

In [47]:
class BookTemplate:
    def __init__(self, isbn=None, title=None, author=None):
        self.isbn = isbn
        self.title = title
        self.author = author

    def jsonSerialize(self):
        return self.__dict__

def setAttributes(obj, dict):
    for key, value in dict.items():
        setattr(obj, key, value)

class AuthorTemplate:
    def jsonSerialize(self):
        return self.__dict__
    

You create an instance of the class as shown below, and set the attributes based on the dictionary. 
Then you can access elements of the data dictionary by access <instance name>.<element>.  

In the example below, we access "book.isbn" and "book.title"

In [49]:
book = BookTemplate()
setAttributes (book, goodreads_data_dictionary)

In [51]:

print (book.isbn)
print (book.title)

0671906062
Treasure / Dragon / Sahara: Clive Cussler Gift Set (Dirk Pitt, #9, #10, #11)


Finally, we can handle more complex components of the API.  For example, here is how to parse book shelve information.  

First, we must write a couple of functions to help us parse the data.

In [81]:
def parse_authors(authors):
    if type(authors) is list:
        return parse_authors_from_list(authors)
    return authors['name']

def parse_shelves(shelves):
    shelves_and_counts = dict()
    for shelf in shelves:
        name = normalize(shelf['@name'])
        if name not in ['to read', 'currently reading']:  # arbitrarily exclude some shelves
            shelves_and_counts[name] = shelf['@count']
        elif name in shelves_and_counts:
            shelves_and_counts[name] = shelves_and_counts[name] + shelf['@count']
    return transform_shelves_dict_to_list(shelves_and_counts)


def parse_similar_books(similar_books):
    books = []
    for book in similar_books:
        books.append({
            'title': book['title'],
            'isbn': book['isbn'],
            'goodreads_rating': book['average_rating'],
            'goodreads_book_img': get_image_url(book['image_url']),
            'pages': book['num_pages'],
            'goodreads_url': book['link'],
            'year': book['publication_year'],
            'author': book['authors']['author']['name'],  # TODO: Handle multiple authors
        })
    return books


def transform_shelves_dict_to_list(shelves_and_counts):
    shelves_list = []
    for k, v in shelves_and_counts.items():
        shelves_list.append({'name': k, 'count': v})
    return shelves_list


def parse_authors_image(authors):
    if type(authors) is list:
        return parse_authors_image_from_list(authors)
    return get_image_url(authors['image_url']['#text'])


def get_image_url(url):
    search_str = '/nophoto/'
    return None if search_str in url else url


def normalize(name):
    return name.lower().replace('-', ' ').replace('_', ' ')

def similar_books(r):
    print (r['similar_books']['book'])
    
    try:
        similar_books = parse_similar_books(r['similar_books']['book'])
    except KeyError:
        similar_books = None
    return similar_books

Then, we can populate a dictionary consisting of all the fields.

In [82]:

goodreads_data_dictionary = {
    
'isbn': r['isbn'],
'isbn13' : r['isbn13'],
'title': r['title'],
'year': r['publication_year'],
'original_publication_year': r['work']['original_publication_year']['#text'],
'publisher': r['publisher'],
'language': r['language_code'],
'description': r['description'],
'pages': r['num_pages'],
'author': parse_authors(r['authors']['author']),
'shelves': parse_shelves(r['popular_shelves']['shelf']),
'similar_books': similar_books(r),
'goodreads_author_img': parse_authors_image(r['authors']['author']),
# THINKME: If we store authors id, need to handle multiple authors case
# 'goodreads_author_id': r['authors']['author']['id'],
'goodreads_url': r['url'],
'goodreads_book_img': get_image_url(r['image_url']),
'goodreads_rating': r['average_rating'],
'not_found': False 
}

[OrderedDict([('id', '7674'), ('title', 'The Michael Crichton Collection: Airframe / The Lost World / Timeline'), ('title_without_series', 'The Michael Crichton Collection: Airframe / The Lost World / Timeline'), ('link', 'https://www.goodreads.com/book/show/7674.The_Michael_Crichton_Collection'), ('small_image_url', 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1320433409l/7674._SX50_.jpg'), ('image_url', 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1320433409l/7674._SX98_.jpg'), ('num_pages', '0'), ('work', OrderedDict([('id', '7185280')])), ('isbn', '0739340336'), ('isbn13', '9780739340332'), ('average_rating', '4.40'), ('ratings_count', '201'), ('publication_year', '2006'), ('publication_month', '8'), ('publication_day', '29'), ('authors', OrderedDict([('author', OrderedDict([('id', '5194'), ('name', 'Michael Crichton'), ('link', 'https://www.goodreads.com/author/show/5194.Michael_Crichton')]))]))]), OrderedDict([('id', '2794671'),

In [89]:
# As noted above, we can then create an instance of the Book class and add the dictionary to it

book = BookTemplate()
setAttributes (book, goodreads_data_dictionary)

In [84]:
# Now we can access the data by referring to the object

print(book.author)

Clive Cussler


In [85]:
print (book.shelves)

[{'name': 'owned', 'count': '4'}, {'name': 'audio wanted', 'count': '3'}, {'name': 'fiction', 'count': '3'}, {'name': 'cussler', 'count': '2'}, {'name': 'audio books owned', 'count': '1'}, {'name': 'wanted', 'count': '1'}, {'name': 'have', 'count': '1'}, {'name': '01 books', 'count': '1'}, {'name': 'server', 'count': '1'}, {'name': 'no thanks', 'count': '1'}, {'name': 'misc', 'count': '1'}, {'name': 'clive cussler', 'count': '1'}, {'name': 'yr 11', 'count': '1'}, {'name': 'cussler pitt', 'count': '1'}, {'name': 'crime', 'count': '1'}, {'name': 'adventure', 'count': '1'}, {'name': 'ebook', 'count': '1'}, {'name': '21 28', 'count': '1'}, {'name': '11 25 2007', 'count': '1'}]


In [86]:
print(book.similar_books)

[{'title': 'The Michael Crichton Collection: Airframe / The Lost World / Timeline', 'isbn': '0739340336', 'goodreads_rating': '4.40', 'goodreads_book_img': 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1320433409l/7674._SX98_.jpg', 'pages': '0', 'goodreads_url': 'https://www.goodreads.com/book/show/7674.The_Michael_Crichton_Collection', 'year': '2006', 'author': 'Michael Crichton'}, {'title': 'Tom Clancy: The Sum Of All Fears, Debt Of Honor, Patriot Games', 'isbn': '0425156516', 'goodreads_rating': '4.40', 'goodreads_book_img': None, 'pages': None, 'goodreads_url': 'https://www.goodreads.com/book/show/2794671-tom-clancy', 'year': None, 'author': 'Tom Clancy'}, {'title': 'The Menacers (Matt Helm, #11)', 'isbn': '0402271726', 'goodreads_rating': '4.40', 'goodreads_book_img': 'https://i.gr-assets.com/images/S/compressed.photo.goodreads.com/books/1298775311l/2737203._SY160_.jpg', 'pages': '192', 'goodreads_url': 'https://www.goodreads.com/book/show/2737203-the-mena

So how do you get this book object to the web page?  You pass the object as a variable in routes.py.  Something like this:

    var data = [
        {% if book.gbrating is defined and book.gbrating is not none %}
            {name: "average", value: {{ book.avgrating }}, color: "#25283d"},
            {name: "google", value: {{ book.gbrating }}, color: "#f17105"},
        {% endif %}
        {name: "goodreads", value: {{ book.goodreads_rating }}, color: "#e6c229"}
    ];

You can also get author data using similar methods

In [91]:
def request_author_id(title, author):
    response = request_book(author, title)
    if response.status_code == 200:
        return parse_author_id(response, author)
    else:
        return None

def build_url_author(id):
    return 'https://www.goodreads.com/author/show/%s?format=xml&key=%s' % (id, goodreads_key)
    

def goodreads_get_author_data(name, a_book):
    author_id = request_author_id(a_book, name)
    if author_id is None:
        return {'not_found': True}

    url = build_url_author(author_id)
    print (url)
    response = request(url)
    if response.status_code == 200:
        return parse_response_author(response)
    else:
        return {'not_found': True}
    
    
def parse_response_author(response):
    author = xmltodict.parse(response.text)['GoodreadsResponse']['author']

    # Fixing birthdate format
    birthdate = None if author['born_at'] is None else (datetime.strptime(author['born_at'], '%Y/%m/%d')).strftime("%B %-d, %Y")

    return {
        'name': author['name'],
        'about': author['about'],
        'influences': parse_influences_html(author['influences']),
        'hometown': author['hometown'],
        'born_date': birthdate,
        'goodreads_link' : author['link'],
        'goodreads_img_url': get_image_url(author['large_image_url']),
        'books_dict' : parse_authors_books_dict(author['books']['book']),
        'books' : parse_authors_books(author['books']['book'])
    }

def parse_authors_books_dict(books_dict):
    b = []
    for book in books_dict:

        # We'd ideally get the original publication date of the book.  Unfortuntely this only returns
        # the most recent edition. To get the original date, we'd need to make a separate call to goodreads by isbn and get the original_publication_date

        t = encode_white_spaces(book['title_without_series'])

        if book['published']:
            b.append({
                'content': '<a href="' + t + '">' + book['title'] + '</a>',
                'start': book['published'],
            })

    return b


def parse_authors_books(books):
    b = []
    for book in books:
        b.append({
            'isbn' : book['isbn'],
            'title' : book['title'],
            'goodreads_image_url' : get_image_url(book['image_url']),
            'goodreads_link' : book['link'],
            'pages' : book['num_pages'],
            'publisher' : book['publisher'],
            'publication_day' : book['publication_day'],
            'publication_year' : book['publication_year'],
            'publication_month' : book['publication_month'],
            'goodreads_rating' : book['average_rating'],
            'goodreads_raters' : book['ratings_count'],
            'description' : book['description'],
            'authors' : book['authors'] # TODO: Get authors in a nice format - mostly to know if there are other authors
        })
    return b


def parse_influences_html(influences_html):
    if (influences_html is None):
        return
    influences = []
    for value in influences_html.split('</a>'):
        v = value.split('>')
        if len(v) > 1:
            influences.append(v[1])
    return influences


def parse_authors_image(authors):
    if type(authors) is list:
        return parse_authors_image_from_list(authors)
    return get_image_url(authors['image_url']['#text'])

def parse_authors_image_from_list(authors):
    urls = []
    for author in authors:
        urls.append(get_image_url(author['image_url']['#text']))
    return urls