In [None]:
import pandas as pd
import requests
import time

# === CONFIG ===
API_KEY = "61375_c7815d021797a1bbccfa4f6a8b6f83a2"
HEADERS = {"Authorization": API_KEY}
INPUT_FILE = "your_input_dataset.csv"      # e.g., "items_improved_image2.csv"
OUTPUT_FILE = "books_complete.csv"
REQUEST_DELAY = 0.25  # seconds between requests (4 per second max for 2000/day plan)

# === Load Base Dataset ===
df = pd.read_csv(INPUT_FILE)

# === Ensure Needed Columns Exist ===
desired_columns = [
    'Title', 'Author', 'ISBN', 'Publisher', 'Subjects', 'Synopsis',
    'Language', 'Image', 'image_original', 'title_long',
    'Dimensions', 'Pages', 'date_published',
    'msrp', 'binding', 'edition', 'related', 'dewey_decimal'
]

for col in desired_columns:
    if col not in df.columns:
        df[col] = ''

# === Helper Function to Query ISBNdb ===
def fetch_book_data(isbn):
    url = f"https://api.isbndb.com/book/{isbn}"
    try:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            return response.json().get("book", {})
        else:
            print(f"[{response.status_code}] Skipped ISBN: {isbn}")
    except Exception as e:
        print(f"[ERROR] ISBN {isbn}: {e}")
    return {}

# === Enrich Dataset ===
for idx, row in df.iterrows():
    isbn = str(row.get('ISBN', '')).replace("-", "").strip()
    if isbn and len(isbn) >= 10:
        book = fetch_book_data(isbn)
        if book:
            df.at[idx, 'Title'] = book.get('title', row.get('Title', ''))
            df.at[idx, 'Author'] = ", ".join(book.get('authors', []))
            df.at[idx, 'Publisher'] = book.get('publisher', '')
            df.at[idx, 'Subjects'] = ", ".join(book.get('subjects', []))
            df.at[idx, 'Synopsis'] = book.get('overview', '')
            df.at[idx, 'Language'] = book.get('language', '')
            df.at[idx, 'Image'] = book.get('image', '')
            df.at[idx, 'image_original'] = book.get('image', '')
            df.at[idx, 'title_long'] = book.get('title_long', '')
            df.at[idx, 'Dimensions'] = book.get('dimensions', '')
            df.at[idx, 'Pages'] = book.get('pages', '')
            df.at[idx, 'date_published'] = book.get('date_published', '')
            df.at[idx, 'msrp'] = book.get('msrp', '')
            df.at[idx, 'binding'] = book.get('binding', '')
            df.at[idx, 'edition'] = book.get('edition', '')
            df.at[idx, 'related'] = ", ".join(book.get('related_isbns', []))
            df.at[idx, 'dewey_decimal'] = book.get('dewey_decimal', '')
        time.sleep(REQUEST_DELAY)

# === Save Final Enriched Dataset ===
df.to_csv(OUTPUT_FILE, index=False)
print(f"\n✅ Done! Dataset saved as: {OUTPUT_FILE}")
