# Open Library

https://openlibrary.org/dev/docs/api/books

In [1]:
# Libraries
import pandas as pd
import requests
import json
import time
import random

#### Random search by ISBN

In [3]:
# Specify the ISBN or Open Library ID of the book you want to retrieve
book_identifier = '9780439554930'

# Make a GET request to the Open Library API
response = requests.get(f'https://openlibrary.org/api/books?bibkeys=ISBN:{book_identifier}&format=json&jscmd=data')

# Parse the JSON response
book_data = response.json()

# Extract the title of the book from the response
book_title = book_data[f'ISBN:{book_identifier}']['title']

print(book_title)

Harry Potter and the Sorcerers Stone


In [4]:
# Different keys and data (random search)

In [5]:
book_data['ISBN:9780439554930'].keys()

dict_keys(['url', 'key', 'title', 'authors', 'identifiers', 'publishers', 'publish_date', 'subjects', 'subject_places', 'subject_people', 'excerpts', 'links', 'cover'])

In [6]:
book_data['ISBN:9780439554930']['title'] #title

'Harry Potter and the Sorcerers Stone'

In [7]:
book_data['ISBN:9780439554930']['authors'][0]['name'] #name

'J. K. Rowling'

In [8]:
book_data['ISBN:9780439554930']['identifiers']['isbn_13'][0] # ISBN 13

'9780439554930'

In [9]:
book_data['ISBN:9780439554930']['publish_date'] # Publish date

'2003'

In [10]:
#book_data['ISBN:9780439554930']['subjects']

In [11]:
book_data[f'ISBN:{book_identifier}']['subject_places']

[{'name': 'England', 'url': 'https://openlibrary.org/subjects/place:england'},
 {'name': 'Hogwarts School of Witchcraft and Wizardry',
  'url': 'https://openlibrary.org/subjects/place:hogwarts_school_of_witchcraft_and_wizardry'},
 {'name': '4 Privet Drive',
  'url': 'https://openlibrary.org/subjects/place:4_privet_drive'},
 {'name': 'Diagon Alley',
  'url': 'https://openlibrary.org/subjects/place:diagon_alley'},
 {'name': 'Leaky Cauldron',
  'url': 'https://openlibrary.org/subjects/place:leaky_cauldron'},
 {'name': 'Gringotts Wizarding Bank',
  'url': 'https://openlibrary.org/subjects/place:gringotts_wizarding_bank'},
 {'name': 'Forbidden Forest',
  'url': 'https://openlibrary.org/subjects/place:forbidden_forest'},
 {'name': "King's Cross Station",
  'url': "https://openlibrary.org/subjects/place:king's_cross_station"},
 {'name': 'Platform Nine and Three-quarters',
  'url': 'https://openlibrary.org/subjects/place:platform_nine_and_three-quarters'}]

#### Keys for all 'fiction' books

In [91]:
# Set url and query parameters for the API request
base_url = 'http://openlibrary.org/search.json'

params = {'q': 'fiction'}

# Make the API request
response = requests.get(base_url, params=params)
response

data = json.loads(response.text)
data

{'numFound': 1450694,
 'start': 0,
 'numFoundExact': True,
 'docs': [{'key': '/works/OL21886356W',
   'type': 'work',
   'seed': ['/books/OL29847262M',
    '/books/OL30319264M',
    '/books/OL30325696M',
    '/books/OL30357773M',
    '/books/OL34214743M',
    '/books/OL30334381M',
    '/books/OL29956311M',
    '/books/OL34211537M',
    '/books/OL34263415M',
    '/books/OL30008154M',
    '/books/OL30337819M',
    '/books/OL34130662M',
    '/books/OL36147924M',
    '/books/OL36151761M',
    '/books/OL36602364M',
    '/books/OL36590163M',
    '/books/OL36854011M',
    '/books/OL36872062M',
    '/books/OL36165308M',
    '/books/OL36060391M',
    '/books/OL35052869M',
    '/books/OL34289223M',
    '/books/OL37603261M',
    '/books/OL36170472M',
    '/books/OL34244325M',
    '/books/OL36461059M',
    '/books/OL36984634M',
    '/books/OL36855851M',
    '/books/OL34283889M',
    '/books/OL37646738M',
    '/books/OL36969369M',
    '/books/OL36994828M',
    '/books/OL36470239M',
    '/books/OL36

In [106]:
data['docs'][0]['subject_key']

KeyError: 'subject_key'

In [13]:
# Main keys
data.keys()

dict_keys(['numFound', 'start', 'numFoundExact', 'docs', 'num_found', 'q', 'offset'])

['numFound', 'start', 'numFoundExact', 'docs', 'num_found', 'q', 'offset']

In [19]:
data['numFound']

1450694

In [27]:
data['start']

0

In [30]:
data['numFoundExact']

True

In [34]:
data['num_found']

1450694

In [38]:
data['q'] # main genre

'fiction'

In [107]:
data['offset']

In [33]:
# Data docs
data['docs'][0].keys()

dict_keys(['key', 'type', 'seed', 'title', 'title_suggest', 'title_sort', 'edition_count', 'edition_key', 'publish_date', 'publish_year', 'first_publish_year', 'number_of_pages_median', 'isbn', 'last_modified_i', 'ebook_count_i', 'ebook_access', 'has_fulltext', 'public_scan_b', 'readinglog_count', 'want_to_read_count', 'currently_reading_count', 'already_read_count', 'publisher', 'language', 'author_key', 'author_name', 'author_alternative_name', 'publisher_facet', '_version_', 'author_facet'])

['key', 'type', 'seed', 'title', 'title_suggest', 'title_sort', 'edition_count', 'edition_key', 'publish_date', 'publish_year', 'first_publish_year', 'number_of_pages_median', 'isbn', 'last_modified_i', 'ebook_count_i', 'ebook_access', 'has_fulltext', 'public_scan_b', 'readinglog_count', 'want_to_read_count', 'currently_reading_count', 'already_read_count', 'publisher', 'language', 'author_key', 'author_name', 'author_alternative_name', 'publisher_facet', '_version_', 'author_facet']

In [49]:
data['docs'][0]['title'] # title

'Herland Illustrated'

In [51]:
data['docs'][0]['edition_count'] # editions

104

In [61]:
# data['docs'][0]['edition_key'] # key for each edition
display(len(data['docs'][0]['edition_key'])) 

104

In [52]:
data['docs'][0]['first_publish_year'] # first_publish_date

2019

In [53]:
data['docs'][0]['publish_year'] # publish_year

[2019, 2020, 2021, 2022]

In [62]:
data['docs'][0]['number_of_pages_median'] # pages

195

In [64]:
#data['docs'][0]['isbn'] # ISBN
len(data['docs'][0]['isbn'])

105

In [67]:
data['docs'][0]['last_modified_i']

1658517449

In [68]:
data['docs'][0]['ebook_count_i']

0

In [70]:
data['docs'][0]['ebook_access']

'no_ebook'

In [71]:
data['docs'][0]['has_fulltext']

False

In [72]:
data['docs'][0]['public_scan_b']

False

In [73]:
data['docs'][0]['readinglog_count']

6

In [74]:
data['docs'][0]['want_to_read_count']

6

In [75]:
data['docs'][0]['currently_reading_count']

0

In [76]:
data['docs'][0]['already_read_count']

0

In [65]:
data['docs'][0]['publisher']

['Independently Published']

In [66]:
data['docs'][0]['language']

['eng']

In [77]:
data['docs'][0]['author_key']

['OL408063A']

In [78]:
data['docs'][0]['author_name']

['Charlotte Perkins Gilman']

In [79]:
data['docs'][0]['author_alternative_name']

['Gilman, Charlotte Perkins',
 'Charlotte perkins Gilman',
 'Charlotte Perkins Perkins Gilman',
 'Charlotte Perkins Gilman Gilman',
 'Charlotte Perkins Stetson Gilman',
 'CHARLOTTE PERKINS GILMAN',
 'Charlotte Perkins 1860-1935 Gilman',
 'Perkins Gilman',
 'Charlotte P. Gilman',
 'Charlotte Gilman',
 'Gilman, Charlotte Perkins 1860-1935.',
 'Charlotte Perkins, Gilman,',
 'Charlotte, Perkins Gilman',
 'Charlotte, Perkins Stetson Gilman',
 'Charlotte (Perkins) Steton Gilman']

In [85]:
data['docs'][0]['publisher_facet']

['Independently Published']

In [101]:
data['docs'][0]['_version_']

1759765126443958272

In [87]:
data['docs'][0]['author_facet']

['OL408063A Charlotte Perkins Gilman']

In [None]:
def get_open_library_data(query):
    # Set the base URL for the Open Library search API
    base_url = 'http://openlibrary.org/search.json'

    # Initialize the list of books
    books = []

    # Set the page number to 1
    page_num = 1

    # Keep looping until we have retrieved all the books
    while True:
        # Set the query parameters for the API request
        params = {'q': query, 'page': page_num}

        # Make the API request
        response = requests.get(base_url, params=params)

        # Check if the API request was successful
        if response.status_code == 200:
            # Parse the JSON response
            data = json.loads(response.text)

            # Loop over each book in the response and extract the desired information
            for book in data['docs']:
                # Get the book title
                title = book.get('title')

                # Get the book author(s)
                author = book.get('author_name')

                # Get the year of the first publication
                publish_date = None
                if 'first_publish_year' in book:
                    publish_date = int(book['first_publish_year'])

                # Get the ISBN-13 number
                isbn_13 = book.get('isbn', [])[0] if 'isbn' in book else None

                # Get the URL for the book's JSON data
                book_url = book.get('key') + '.json'

                # Check if the book URL is a valid URL
                if not book_url.startswith('http'):
                    continue

                # Make a request for the book's JSON data
                book_response = requests.get(book_url)

                # Check if the request was successful
                if book_response.status_code == 200:
                    # Parse the JSON data
                    book_data = json.loads(book_response.text)

                    # Get the book's description
                    description = book_data.get('description', None)

                    # Get the book's rating and number of pages
                    details = book_data.get('details', {})
                    average_rating = details.get('average_rating', None)
                    num_pages = details.get('number_of_pages', None)

                    # Get the book's price, if available
                    price = None
                    for item in book_data.get('items', []):
                        if 'price' in item:
                            price = item['price']
                            break

                    # Get the book's awards, if available
                    awards = book_data.get('awards', None)

                # Add the book information to the list of books
                books.append({
                    'Title': title,
                    'Author': author,
                    'ISBN_13': isbn_13,
                    'Publish_Date': publish_date,
                    'Description': description,
                    'Average_Rating': average_rating,
                    'Number_of_Pages': num_pages,
                    'Price': price,
                    'Awards': awards
                })

                # Add a random sleep time between 1 and 3 seconds to avoid overwhelming the Open Library API
                time.sleep(random.randint(1, 3))

            # Check if we have reached the end of the list of books
            if len(books) >= data['num_found']:
                break

            # Increment the page number
            page_num += 1
        else:
            # If the API request was not successful, print an error message
            print(f"Error: API request failed with status code {response.status_code}")
            return None

    # Return the list of books
    return books
