In [1]:
import json
import requests

In [14]:
def fetch_book_details(work_title, author_english):
    base_url = "http://openlibrary.org/search.json"
    params = {"title": work_title, "author": author_english}
    details = {'subjects': [], 'summary': ''}  # Initialize the dictionary to store subjects and summary
    
    try:
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            data = response.json()
            # Assume the first document is the correct one
            if data['docs']:
                doc = data['docs'][0]
                # Fetch subjects if available
                if 'subject' in doc:
                    details['subjects'] = doc['subject']
                # To fetch summary, we might need another API call to a specific edition if available
                if 'key' in doc:
                    edition_key = doc['key']
                    edition_response = requests.get(f"https://openlibrary.org{edition_key}.json")
                    if edition_response.status_code == 200:
                        edition_data = edition_response.json()
                        # Check if there's a description field
                        if 'description' in edition_data:
                            if isinstance(edition_data['description'], dict):
                                details['summary'] = edition_data['description'].get('value', '')
                            else:
                                details['summary'] = edition_data['description']
        else:
            print(f"Error fetching data for {work_title}: Status code {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Request exception for {work_title} and author {author_english}: {e}")
    except json.JSONDecodeError:
        print(f"Failed to decode JSON for {work_title} and author {author_english}")
    
    return details


In [18]:
with open('./refined_fiction_libwebscraping.json', 'r') as file:
    books = json.load(file)

In [21]:
# Assuming 'books' is your list of book dictionaries loaded from the JSON file
for book in books:
    details = fetch_book_details(book['sourceTitle'], book['authorEnglish'])
    book.update(details)  # This adds/updates the 'subjects' and 'summary' keys in each book dictionary

In [22]:
# Saving the updated list back to a JSON file
with open('updated_books_openlib_0221.json', 'w') as file:
    json.dump(books, file, indent=4, ensure_ascii=False)