In [None]:
# Reload module to pick up changes
import importlib
import verse_scraper
importlib.reload(verse_scraper)

from verse_scraper import scrape_verse, VerseData
import json

from verse_scraper import BIBLE_BOOKS, scrape_chapter, scrape_book, scrape_bible, scrape_verse_safe
import time

In [None]:
# Example: Scrape a single chapter (1 Peter 1)
# This will automatically stop when it hits a 404 (no more verses)

chapter_data = scrape_chapter("1_peter", 1)
print(f"Found {len(chapter_data)} verses in 1 Peter 1")

for v in chapter_data[:3]:  # Show first 3 verses
    print(f"  {v.reference}: {v.translations[0].text[:80] if v.translations else 'No text'}...")

In [None]:
# Scrape an entire book (be patient - this takes a while!)
# Example: Scrape Philemon (short book with 1 chapter, 25 verses)

def print_progress(verse_data):
    print(f"  Scraped: {verse_data.reference}")

# Uncomment to run:
# philemon_data = scrape_book("philemon", callback=print_progress)
print(json.dumps([v.to_dict() for v in philemon_data],indent=2,ensure_ascii=False))
print(f"\nTotal verses in Philemon: {len(philemon_data)}")

In [None]:
# Full Bible scraping would take HOURS (~31,000 verses at ~1.3s each = ~11 hours)
# Here's how to do it with saving progress:

import json
import time
from pathlib import Path

def scrape_with_persistence(output_dir: str = "bible_data"):
    """Scrape the Bible with progress saving to disk."""
    Path(output_dir).mkdir(exist_ok=True)
    
    for book in BIBLE_BOOKS:
        output_file = Path(output_dir) / f"{book}.json"
        
        # Skip if already scraped
        if output_file.exists():
            print(f"Skipping {book} (already exists)")
            continue
        
        print(f"\nðŸ“– Scraping {book}...")
        start = time.time()
        
        verses = scrape_book(book, callback=lambda v: print(f"  {v.reference}"))
        
        # Save to JSON
        with open(output_file, 'w') as f:
            json.dump([v.to_dict() for v in verses], f, indent=2, ensure_ascii=False)
        
        elapsed = time.time() - start
        print(f"âœ… {book}: {len(verses)} verses in {elapsed:.1f}s")

# Uncomment to start scraping (this will take HOURS):
scrape_with_persistence()

In [None]:
# Test the scraper
verse_data = scrape_verse("https://biblehub.com/1_peter/1-1.htm")

print(f"Reference: {verse_data.reference}")
print(f"Book: {verse_data.book}, Chapter: {verse_data.chapter}, Verse: {verse_data.verse}")
print()

print("=" * 60)
print("TRANSLATIONS")
print("=" * 60)
for t in verse_data.translations:
    print(f"\n[{t.version}]")
    print(t.text)

print()
print("=" * 60)
print("GREEK WORDS")
print("=" * 60)
for g in verse_data.greek_words:
    print(f"\n'{g.english_word}' -> {g.word} ({g.transliteration}) - Strong's {g.strongs_number}")
    if g.part_of_speech:
        print(f"  Part of Speech: {g.part_of_speech}")
    if g.definition:
        print(f"  Definition: {g.definition}")

print()
print("=" * 60)
print(f"CROSS REFERENCES ({len(verse_data.cross_references)} total)")
print("=" * 60)
for cr in verse_data.cross_references:  # Show ALL cross-references
    print(f"\n{cr.reference}")
    if cr.text:
        print(f"  {cr.text}")

In [None]:
# Export as JSON
print(json.dumps(verse_data.to_dict(), indent=2, ensure_ascii=False))