In [1]:
# Load libraries

import pandas as pd
import re

from olclient.openlibrary import OpenLibrary
ol = OpenLibrary()

In [2]:
# Example book search using title and author

temp_book = ol.Work.search(title='The Widows of Malabar Hill', author='Sujata Massey')

In [3]:
# Attributes of search "object"

temp_book.title

'The Widows of Malabar Hill'

In [4]:
# OL19729016W is the book's Open Library ID

temp_book.identifiers

{'olid': ['OL19729016W'],
 'isbns': [],
 'oclc': ['1004663337', '983148226'],
 'lccn': ['2017021391'],
 'goodreads': [],
 'librarything': []}

In [5]:
# OL39452A is the author's Open Library ID

temp_book.authors

[{'name': 'Sujata Massey', 'olid': 'OL39452A'}]

In [6]:
# Use book's Open Library ID to access all available information

temp_book_record = ol.Work.get('OL19729016W')

In [7]:
# Book information includes a list of subjects

temp_book_record.subjects

['Detective and mystery stories',
 'Women lawyers',
 'Fiction',
 'Fiction, mystery & detective, historical',
 'Fiction, legal',
 'Widows, fiction',
 'Historical fiction',
 'Mystery & Detective',
 'Traditional British',
 'Historical']

In [8]:
# Book information includes a list of places/settings

temp_book_record.subject_places

['India', 'Mumbai (India)']

In [9]:
# See all available book information

print(temp_book_record)

<class 'olclient.entity_helpers.work.get_work_helper_class.<locals>.Work' {'identifiers': {}, 'olid': 'OL19729016W', '_editions': [], 'description': '"Introducing an extraordinary female lawyer-sleuth in a new historical series set in 1920s Bombay. Perveen Mistry, the daughter of a respected Zoroastrian family, has just joined her father\'s law firm, becoming one of the first female lawyers in India. Armed with a law degree from Oxford, Perveen also has a tragic personal history that makes her especially devoted to championing and protecting women\'s legal rights. Mistry Law has been appointed to execute the will of Mr. Omar Farid, a wealthy Muslim mill owner who has left three widows behind. But as Perveen is going through the paperwork, she notices something strange: all three of the wives have signed over their full inheritance to a charity. What will they live on if they forfeit what their husband left them? Perveen is suspicious, especially since one of the widows has signed her f

In [10]:
# Load example list of book titles and authors in a .csv file

sample_data = pd.read_csv('sample.csv')

In [11]:
sample_data

Unnamed: 0,Title,Author
0,The Ghost Map,Steven Johnson
1,The Gods Themselves,Isaac Asimov
2,Running Wild,Lucy Jane Bledsoe
3,Code Name Edelweiss,Stephanie Landsem
4,The Maid,Nita Prose
5,Rebecca,Stephanie Du Maurier


In [13]:
# Make a new "dictionary" to receive information fetched from Open Library
# Note: clear out this dictionary by re-running this code block, when re-running the loop below that retrieves OL data

new_dict = {'OL_olid' : [], 
            'OL_title' : [],
            'OL_author_id' : [], 
            'OL_author' : [], 
            'OL_subjects' : [] }

In [14]:
# Loop through each book and retrieve information from Open Library
# Remember to start with an empty dictionary, otherwise unnecessary information could be appended

for i in range(0, len(sample_data)):                                                      #for all rows in book dataset
    search_result = ol.Work.search(title=sample_data.Title[i], author=sample_data.Author[i])   #fetch the title and author

    if (search_result is None) == True:                                             #if title and author combination can't be found
        search_result_2 = ol.Work.search(title=sample_data.Title[i])                      #try just the title (sometimes no author listed)

        if (search_result_2 is None) == True:                                       #if still can't find just the book title
            new_dict['OL_olid'].append(None)                                        #paste "none" in the dictionary, in all columns
            new_dict['OL_title'].append(None)
            new_dict['OL_author_id'].append(None)
            new_dict['OL_author'].append(None)
            new_dict['OL_subjects'].append(None)

        else:                                                                       #if book title found
            search_olid = ''.join(search_result_2.identifiers.get('olid'))          #get the book title's Open Library ID   
            new_dict['OL_olid'].append(search_olid)                                 #paste the ID in my dictionary
            
            olid_info = ol.Work.get(search_olid)                                    #get Open Library data associated with that ID

            if hasattr(olid_info, 'title') == True:                                 #if Open Library data includes the book title
                new_dict['OL_title'].append(olid_info.title)                        #paste that book title in my dictionary, to check later
            else:                                                                   #if Open Library data does not include the book title
                new_dict['OL_title'].append(None)                                   #paste "none" in my dictionary

            if hasattr(olid_info, 'authors') == True:                               #if Open Library data includes author info
                authorID = re.sub(r'/authors/', r'', olid_info.authors[0].get('author').get('key'))      #get the author's Open Library ID
                new_dict['OL_author_id'].append(authorID)                           #paste author's Open Library ID in my dictionary
                authorName = ol.Author.get(authorID).name                           #get name associated with author's Open Library ID
                new_dict['OL_author'].append(authorName)                            #paste author's name in my dictionary
            else:                                                                   #if Open Library data doesn't include author info
                new_dict['OL_author_id'].append(None)                               #paste "none" in my dictionary
                new_dict['OL_author'].append(None)

            if hasattr(olid_info, 'subjects') == True:                              #if Open Library data includes subject info
                new_dict['OL_subjects'].append(olid_info.subjects)                  #paste subject info in my dictionary 
            else:                                                                   #if Open Library data doesn't include subject info
                new_dict['OL_subjects'].append(None)                                #paste "none" in my dictionary

    else:                                                                           #first attempt at finding title & author combo succeeds
        search_olid = ''.join(search_result.identifiers.get('olid'))                #same process as above, once book title is found
        new_dict['OL_olid'].append(search_olid)                                     

        olid_info = ol.Work.get(search_olid)                                        

        if hasattr(olid_info, 'title') == True:                                     
            new_dict['OL_title'].append(olid_info.title)
        else:
            new_dict['OL_title'].append(None)

        if hasattr(olid_info, 'authors') == True:
            authorID = re.sub(r'/authors/', r'', olid_info.authors[0].get('author').get('key'))
            new_dict['OL_author_id'].append(authorID)
            authorName = ol.Author.get(authorID).name
            new_dict['OL_author'].append(authorName)
        else:
            new_dict['OL_author_id'].append(None)
            new_dict['OL_author'].append(None)
        
        if hasattr(olid_info, 'subjects') == True:
            new_dict['OL_subjects'].append(olid_info.subjects)
        else:
            new_dict['OL_subjects'].append(None)
            

In [15]:
# Make new dictionary into dataframe

new_df = pd.DataFrame.from_dict(new_dict)

In [16]:
new_df

Unnamed: 0,OL_olid,OL_title,OL_author_id,OL_author,OL_subjects
0,OL2668651W,The Ghost Map,OL389304A,Steven Johnson,"[Physicians, Epidemics, Cholera, Medical, Hist..."
1,OL46395W,The Gods Themselves,OL34221A,Isaac Asimov,"[Fiction in English, Hugo Award Winner, award:..."
2,OL20453834W,Running Wild,OL444181A,Lucy Jane Bledsoe,"[Children's fiction, Fathers and daughters, fi..."
3,OL28406392W,Code Name Edelweiss,OL7146199A,Stephanie Landsem,
4,OL24554831W,The Maid,OL9251784A,Nita Prose,"[English literature, nyt:combined-print-and-e-..."
5,OL36633W,Rebecca,OL34047A,Daphne du Maurier,"[Married women, Cornwall (England : County), R..."


In [17]:
# Append OL data to original list of titles and authors, to check if matches are correct

combined_data = pd.concat([sample_data, new_df], axis = 1)

In [18]:
combined_data

Unnamed: 0,Title,Author,OL_olid,OL_title,OL_author_id,OL_author,OL_subjects
0,The Ghost Map,Steven Johnson,OL2668651W,The Ghost Map,OL389304A,Steven Johnson,"[Physicians, Epidemics, Cholera, Medical, Hist..."
1,The Gods Themselves,Isaac Asimov,OL46395W,The Gods Themselves,OL34221A,Isaac Asimov,"[Fiction in English, Hugo Award Winner, award:..."
2,Running Wild,Lucy Jane Bledsoe,OL20453834W,Running Wild,OL444181A,Lucy Jane Bledsoe,"[Children's fiction, Fathers and daughters, fi..."
3,Code Name Edelweiss,Stephanie Landsem,OL28406392W,Code Name Edelweiss,OL7146199A,Stephanie Landsem,
4,The Maid,Nita Prose,OL24554831W,The Maid,OL9251784A,Nita Prose,"[English literature, nyt:combined-print-and-e-..."
5,Rebecca,Stephanie Du Maurier,OL36633W,Rebecca,OL34047A,Daphne du Maurier,"[Married women, Cornwall (England : County), R..."


In [20]:
# Export as csv

combined_data.to_csv("combined_data.csv")

In [21]:
# Note that not all books have all information available