In [1]:
from pymongo import MongoClient

client = MongoClient("mongodb://localhost:27017/")
db = client["library_db"]
books_collection = db["books"]

print("Connected to MongoDB")


Connected to MongoDB


In [2]:
existing_ids = list(books_collection.find({}, {"book_id": 1}))
start_id = max([b["book_id"] for b in existing_ids], default=0) + 1

print("Starting book_id from:", start_id)


Starting book_id from: 1


In [3]:
import requests
import random

SEARCH_URL = "https://openlibrary.org/search.json"
NUM_BOOKS = 50

all_docs = []

for page in range(1, 6):  # multiple pages for randomness
    res = requests.get(
        SEARCH_URL,
        params={"q": "fiction", "page": page},
        timeout=10
    )
    res.raise_for_status()
    all_docs.extend(res.json().get("docs", []))

len(all_docs)


500

In [4]:
selected_books = random.sample(all_docs, NUM_BOOKS)
len(selected_books)


50

In [5]:
def get_description(work_key):
    try:
        url = f"https://openlibrary.org{work_key}.json"
        res = requests.get(url, timeout=10)
        res.raise_for_status()
        data = res.json()

        desc = data.get("description", "No description available.")

        if isinstance(desc, dict):
            return desc.get("value", "No description available.")
        if isinstance(desc, list):
            return desc[0]

        return desc
    except:
        return "No description available."


In [6]:
docs_to_insert = []

for i, book in enumerate(selected_books):
    title = book.get("title")
    work_key = book.get("key")  # e.g. /works/OLxxxxW

    if not title or not work_key:
        continue

    description = get_description(work_key)

    cover_id = book.get("cover_i")
    image_url = (
        f"https://covers.openlibrary.org/b/id/{cover_id}-L.jpg"
        if cover_id
        else "https://via.placeholder.com/128x192?text=No+Cover"
    )

    doc = {
        "book_id": start_id + i,
        "title": title,
        "description": description,
        "image_url": image_url,
    }

    docs_to_insert.append(doc)

len(docs_to_insert)


50

In [7]:
docs_to_insert[0]


{'book_id': 1,
 'title': 'The Taming of the Shrew',
 'description': 'This play within a play is a delightful farce about a fortune hunter who marries and tames" the town shrew. The comedy, often produced today because of its accessibility, is one of the plays Shakespeare intended for the general public rather than for the nobility.\r\n\r\n\r\nCliffsComplete combines the full original text of The Taming of the Shrew with a helpful glossary and CliffsNotes-quality commentary into one volume. You will find:A unique pedagogical approach that combines the complete original text with expert commentary following each sceneA descriptive bibliography and historical background on the author, the times, and the work itselfAn improved character map that graphically illustrates the relationships among the charactersSidebar glossaries"',
 'image_url': 'https://covers.openlibrary.org/b/id/7889534-L.jpg'}

In [8]:
if docs_to_insert:
    books_collection.insert_many(docs_to_insert)
    print(f"Inserted {len(docs_to_insert)} books")
else:
    print("No documents to insert")


Inserted 50 books
