In [2]:
import json
import requests
import os
from dotenv import load_dotenv

In [3]:
# Load the environment variables from .env file
load_dotenv()

# Access the API key
api_key = os.getenv('GOOGLE_BOOKS_API_KEY')

In [4]:
def load_json_data(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        return json.load(file)


In [5]:
def save_json_data(data, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

In [6]:
def query_google_books(title, publisher, year, api_key):
    """Query Google Books API using title, publisher, and year."""
    # Construct the query directly with title, publisher, and year
    query = f'intitle:"{title}"+inpublisher:"{publisher}"+year:{year}'
    url = f'https://www.googleapis.com/books/v1/volumes?q={query}&key={api_key}'
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return None


In [17]:
def update_book_records(book_records, api_key):
    for record in book_records:
        # Initialize InfoLink and ISBNs with default values
        record['InfoLink'] = None
        record['ISBNs'] = []

        # Perform the query
        response = query_google_books(record['sourceTitle'], record['publisher'], record['year'], api_key)
        if response and 'items' in response and len(response['items']) > 0:
            # Assuming the first result is the closest match
            book_info = response['items'][0]['volumeInfo']
            
            # Update InfoLink
            if 'infoLink' in book_info:
                record['InfoLink'] = book_info['infoLink']
            
            # Update ISBNs
            if 'industryIdentifiers' in book_info:
                isbn_list = [identifier['identifier'] for identifier in book_info['industryIdentifiers'] if identifier['type'] in ['ISBN_10', 'ISBN_13']]
                record['ISBNs'] = isbn_list

In [18]:
book_records = load_json_data('./Record Feb 28.json')

In [19]:
update_book_records(book_records, api_key)

KeyboardInterrupt: 

In [13]:
save_json_data(book_records, 'updated_books_march_test.json')

In [18]:
def analyze_book_records(file_path):
    with open(file_path, 'r') as file:
        book_records = json.load(file)
    
    total_records = len(book_records)
    no_info_link_count = sum(1 for record in book_records if not record.get('InfoLink'))
    no_isbns_count = sum(1 for record in book_records if not record.get('ISBNs'))
    
    print(f"Total records: {total_records}")
    if total_records > 0:
        no_info_link_percentage = (no_info_link_count / total_records) * 100
        no_isbns_percentage = (no_isbns_count / total_records) * 100
        
        print(f"Records without InfoLink: {no_info_link_count} ({no_info_link_percentage:.2f}%)")
        print(f"Records without ISBNs: {no_isbns_count} ({no_isbns_percentage:.2f}%)")
    else:
        print("No records found in the file.")

# Replace 'path_to_your_books.json' with the actual path to your JSON file
analyze_book_records('./updated_books_march.json')

Total records: 3292
Records without InfoLink: 2352 (71.45%)
Records without ISBNs: 2542 (77.22%)


In [19]:
def analyze_fiction_book_records(file_path):
    with open(file_path, 'r') as file:
        book_records = json.load(file)
    
    # Filter for fiction records
    fiction_records = [record for record in book_records if record.get('genre') == 'Fiction']
    
    total_fiction_records = len(fiction_records)
    fiction_no_info_link_count = sum(1 for record in fiction_records if not record.get('InfoLink'))
    fiction_no_isbns_count = sum(1 for record in fiction_records if not record.get('ISBNs'))
    
    print(f"Total Fiction records: {total_fiction_records}")
    if total_fiction_records > 0:
        no_info_link_percentage = (fiction_no_info_link_count / total_fiction_records) * 100
        no_isbns_percentage = (fiction_no_isbns_count / total_fiction_records) * 100
        
        print(f"Fiction records without InfoLink: {fiction_no_info_link_count} ({no_info_link_percentage:.2f}%)")
        print(f"Fiction records without ISBNs: {fiction_no_isbns_count} ({no_isbns_percentage:.2f}%)")
    else:
        print("No fiction records found.")


analyze_fiction_book_records('./updated_books_march.json')

Total Fiction records: 2121
Fiction records without InfoLink: 1715 (80.86%)
Fiction records without ISBNs: 1796 (84.68%)
