In [1]:
import json
import requests

In [2]:
# Function to fetch subjects from Google Books API based on work title
def fetch_subjects_from_google_books(sourceTitle):
    base_url = "https://www.googleapis.com/books/v1/volumes"
    params = {"q": f'intitle:"{sourceTitle}"', "fields": "items(volumeInfo/categories)"}
    response = requests.get(base_url, params=params)
    try:
        data = response.json()
        if 'items' in data and len(data['items']) > 0 and 'volumeInfo' in data['items'][0] and 'categories' in data['items'][0]['volumeInfo']:
            return data['items'][0]['volumeInfo']['categories']
        else:
            return []
    except json.JSONDecodeError:
        print(f"Failed to decode JSON for {sourceTitle}")
        return []


In [5]:
def update_json_with_subjects(file_path):
    # Load the JSON data from file
    with open(file_path, 'r') as file:
        books = json.load(file)
    
    # Fetch and add subjects for each book
    for book in books:
        subjects = fetch_subjects_from_google_books(book['sourceTitle'])
        book['subjects'] = subjects
    
    # Return the updated books data
    return books


In [6]:
def save_books_to_json(books, file_path):
    # Save the updated data back to a new JSON file
    with open(file_path, 'w') as file:
        json.dump(books, file, indent=4, ensure_ascii=False)
    print(f"Updated JSON saved as '{file_path}'.")

In [8]:
# Path to your JSON file
file_path = './refined_libwebscraping.json'  # Replace with the path to your JSON file
updated_books = update_json_with_subjects(file_path)

In [9]:
save_books_to_json(updated_books, 'googlebooks.json')

Updated JSON saved as 'googlebooks.json'.
