In [1]:
import pandas as pd
import requests
import urllib.parse

# How to get books from goodreads
Goto goodreads.com -> my books -> Tools -> Import & Export -> Export Library -> Download CSV

In [None]:
book_file = "./src/routes/library/book-list.csv"
df = pd.read_csv(book_file)


In [7]:
def fetch_cover_url_by_isbn(isbn):
    if not isbn or pd.isna(isbn):
        return None
    # Clean ISBN: remove spaces, keep hyphens for the API
    isbn_str = str(isbn).strip()
    if not isbn_str:
        return None
    base_url = "https://bookcover.longitood.com/bookcover"
    url = f"{base_url}/{isbn_str}"
    try:
        response = requests.get(url, timeout=8)
        if response.status_code == 200:
            data = response.json()
            return data.get('url', None)
    except Exception as e:
        print(f"Error fetching cover for ISBN '{isbn_str}': {e}")
    return None

# If you have a large dataframe, consider slicing for testing or rate-limiting your requests.
df['cover_url'] = df.apply(
    lambda row: row["cover_url"] if pd.notna(row['cover_url']) else fetch_cover_url_by_isbn(row['ISBN']) if pd.notna(row['ISBN']) else None,
    axis=1
)


In [10]:
import os
import re

output_dir = "./src/lib/images/book-cover"
os.makedirs(output_dir, exist_ok=True)

def download_cover_image(row):
    url = row['cover_url']
    isbn = str(row['ISBN'])
    if pd.isna(url) or not url or pd.isna(isbn) or not isbn:
        return None
    # Remove non-alphanumeric characters from the ISBN for the filename
    clean_isbn = re.sub(r'[^A-Za-z0-9]', '', isbn)
    # Guess file extension from URL; fallback to jpg
    ext = os.path.splitext(urllib.parse.urlparse(url).path)[1]
    if not ext or ext.lower() not in [".jpg", ".jpeg", ".png", ".webp"]:
        ext = ".jpg"
    filename = f"{clean_isbn}{ext}"
    output_path = os.path.join(output_dir, filename)
    rel_path = os.path.join("images/book-cover", filename)
    # Download only if not already exists, or if file is <1KiB (broken)
    need_download = not os.path.isfile(output_path) or (os.path.isfile(output_path) and os.path.getsize(output_path) < 1024)
    if need_download:
        try:
            resp = requests.get(url, stream=True, timeout=12)
            if resp.status_code == 200:
                with open(output_path, "wb") as f:
                    for chunk in resp.iter_content(1024):
                        f.write(chunk)
            else:
                print(f"Failed download for ISBN {isbn}: status {resp.status_code}")
                return None
        except Exception as e:
            print(f"Error downloading image for {isbn}: {e}")
            return None
    return rel_path

df['cover_path'] = df.apply(download_cover_image, axis=1)


In [11]:
df.to_csv("src/routes/library/books-db.csv", index=False)
