# Personal book recommendation system

**May 2025**. I've noticed that I personally enjoy books the most when I read the _most dissimilar book_ next. 

I've thought about doing this as a random pull from my TBR, but my TBR is very lumpy and not uniformly distributed across my interests at all.

In [6]:
import requests
import numpy as np
import pandas as pd


OPEN_LIBRARY_API_URL = "https://openlibrary.org/search.json?q="
OPEN_LIBRARY_HEADERS = {
    "User-Agent": "personal_project/1.0 (angela.ambroz@users.noreply.github.com)"
}

In [44]:
df = pd.read_csv('../secrets/goodreads_data.csv')

df["ISBN"] = df["ISBN"].apply(lambda x: x.split('"')[1])

tbr = df[df['Exclusive Shelf'] == 'to-read'].copy()
len(df), len(tbr)

(3328, 1905)

In [45]:
tbr.sample().T

Unnamed: 0,1197
Book Id,124430
Title,City of Djinns: A Year in Delhi
Author,William Dalrymple
Author l-f,"Dalrymple, William"
Additional Authors,Olivia Fraser
ISBN,0142001007
ISBN13,"=""9780142001004"""
My Rating,0
Average Rating,4.13
Publisher,Penguin Publishing Group


In [46]:
ex = tbr.sample().to_dict(orient='records')[0]
ex

{'Book Id': 22456,
 'Title': 'The Origin of Wealth: Evolution, Complexity, and the Radical Remaking of Economics',
 'Author': 'Eric D. Beinhocker',
 'Author l-f': 'Beinhocker, Eric D.',
 'Additional Authors': nan,
 'ISBN': '157851777X',
 'ISBN13': '="9781578517770"',
 'My Rating': 0,
 'Average Rating': 4.27,
 'Publisher': 'Harvard Business Review Press',
 'Binding': 'Hardcover',
 'Number of Pages': 527.0,
 'Year Published': 2006.0,
 'Original Publication Year': 2006.0,
 'Date Read': nan,
 'Date Added': '2024/03/21',
 'Bookshelves': 'to-read',
 'Bookshelves with positions': 'to-read (#732)',
 'Exclusive Shelf': 'to-read',
 'My Review': nan,
 'Spoiler': nan,
 'Private Notes': nan,
 'Read Count': 0,
 'Owned Copies': 0}

In [47]:
# url = f'https://openlibrary.org/search.json?q=title%3A{ex["Title"]}+AND+first_publish_year%3A{int(ex["Year Published"])}+AND+author%3A{ex["Author"]}'
# response = requests.get(url, headers=OPEN_LIBRARY_HEADERS)
# print(response.text)

In [58]:
import requests
import json
import time
from urllib.parse import quote

def search_google_books(title, author=None, isbn=None):
    """Search Google Books API with different strategies"""
    base_url = "https://www.googleapis.com/books/v1/volumes"
    
    # Build query - try ISBN first if available
    if isbn:
        query = f"isbn:{isbn}"
    elif author:
        query = f"intitle:{title} inauthor:{author}"
    else:
        query = f"intitle:{title}"
    
    params = {
        "q": query,
        "maxResults": 5,  # get a few options
        "key": "AIzaSyCgVeYSHyhX9C0tjVmyjedpJ0pBS2115Vo"  # get this from google cloud console
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        print(f"API error: {e}")
        return None

def extract_book_info(book_data):
    """Extract useful info from google books response"""
    if not book_data or "items" not in book_data:
        return None
    
    book = book_data["items"][0]["volumeInfo"]  # take first result
    
    return {
        "title": book.get("title"),
        "authors": book.get("authors", []),
        "description": book.get("description"),
        "categories": book.get("categories", []),
        "published_date": book.get("publishedDate"),
        "page_count": book.get("pageCount"),
        "average_rating": book.get("averageRating"),
        "language": book.get("language")
    }

# example usage
result = search_google_books(ex["Title"], ex["Author"], ex["ISBN"])
book_info = extract_book_info(result)
print(json.dumps(book_info, indent=2))

{
  "title": "The Origin of Wealth",
  "authors": [
    "Eric D. Beinhocker"
  ],
  "description": "Beinhocker has written this work in order to introduce a broad audience to what he believes is a revolutionary new paradigm in economics and its implications for our understanding of the creation of wealth. He describes how the growing field of complexity theory allows for evolutionary understanding of wealth creation, in which business designs co-evolve with the evolution of technologies and organizational innovations. In addition to giving his audience a tour of this field of complexity economics, he discusses its implications for real-world issues of business.",
  "categories": [
    "Business & Economics"
  ],
  "published_date": "2006",
  "page_count": 556,
  "average_rating": 4,
  "language": "en"
}


In [59]:
ex | book_info

{'Book Id': 22456,
 'Title': 'The Origin of Wealth: Evolution, Complexity, and the Radical Remaking of Economics',
 'Author': 'Eric D. Beinhocker',
 'Author l-f': 'Beinhocker, Eric D.',
 'Additional Authors': nan,
 'ISBN': '157851777X',
 'ISBN13': '="9781578517770"',
 'My Rating': 0,
 'Average Rating': 4.27,
 'Publisher': 'Harvard Business Review Press',
 'Binding': 'Hardcover',
 'Number of Pages': 527.0,
 'Year Published': 2006.0,
 'Original Publication Year': 2006.0,
 'Date Read': nan,
 'Date Added': '2024/03/21',
 'Bookshelves': 'to-read',
 'Bookshelves with positions': 'to-read (#732)',
 'Exclusive Shelf': 'to-read',
 'My Review': nan,
 'Spoiler': nan,
 'Private Notes': nan,
 'Read Count': 0,
 'Owned Copies': 0,
 'title': 'The Origin of Wealth',
 'authors': ['Eric D. Beinhocker'],
 'description': 'Beinhocker has written this work in order to introduce a broad audience to what he believes is a revolutionary new paradigm in economics and its implications for our understanding of the 

# OK, getting everything

In [56]:
import time

tbr_dict = tbr.to_dict(orient='records')

for book in tbr_dict:
    print(f"Now doing {book['Title']}...")
    result = search_google_books(
        book["Title"],
        book["Author"],
        book["ISBN"]
    )
    more_book = extract_book_info(result)
    if more_book:
        found = True
        book = book | more_book
    else:
        print(f"Couldn't find {book['Title']}...") 
        found = False
    book["found"] = found
    # time.wait(1)

tbr_dict

Now doing Valuable Humans in Transit and Other Stories...
Now doing Permutation City...
Now doing Hothouse...
Now doing Queen of Angels (Queen of Angels, #1)...
Now doing Darwin's Radio (Darwin's Radio, #1)...
Now doing Drawdown: The Most Comprehensive Plan Ever Proposed to Reverse Global Warming...
Now doing The AI Con: How to Fight Big Tech's Hype and Create the Future We Want...
Now doing Luminous...
Now doing America, América: A New History of the New World...
Now doing Tunnel 29: The True Story of an Extraordinary Escape Beneath the Berlin Wall...
Now doing The Oppermanns (McNally Editions)...
Now doing The Lottery...
Now doing There Will Come Soft Rains (Tale Blazers)...
Now doing We Have Never Been Woke: The Cultural Contradictions of a New Elite...
Now doing The Case for Degrowth...
Now doing The Case for a Job Guarantee...
Now doing The Case for Medicare for All...
Now doing The Case for Economic Democracy...
Now doing The Case For People's Quantitative Easing...
Now doing The

KeyboardInterrupt: 

In [57]:
tbr_dict

[{'Book Id': 63198504,
  'Title': 'Valuable Humans in Transit and Other Stories',
  'Author': 'qntm',
  'Author l-f': 'qntm, qntm',
  'Additional Authors': 'Sam  Hughes',
  'ISBN': '',
  'ISBN13': '=""',
  'My Rating': 0,
  'Average Rating': 4.3,
  'Publisher': nan,
  'Binding': 'Kindle Edition',
  'Number of Pages': 101.0,
  'Year Published': 2022.0,
  'Original Publication Year': 2022.0,
  'Date Read': nan,
  'Date Added': '2025/05/18',
  'Bookshelves': 'to-read',
  'Bookshelves with positions': 'to-read (#1904)',
  'Exclusive Shelf': 'to-read',
  'My Review': nan,
  'Spoiler': nan,
  'Private Notes': nan,
  'Read Count': 0,
  'Owned Copies': 0},
 {'Book Id': 156784,
  'Title': 'Permutation City',
  'Author': 'Greg Egan',
  'Author l-f': 'Egan, Greg',
  'Additional Authors': nan,
  'ISBN': '006105481X',
  'ISBN13': '="9780061054815"',
  'My Rating': 0,
  'Average Rating': 4.06,
  'Publisher': 'HarperPrism',
  'Binding': 'Mass Market Paperback',
  'Number of Pages': 352.0,
  'Year Pub