In [33]:
import requests
from bs4 import BeautifulSoup
import time  #Allows us to pause between downloads to avoid stressing the website.
import pandas as pd

In [34]:
#Define the Books and Chapter Counts

book_chapters = { "gen": 5 }      # a dictionary mapping Bible books (like "gen") to number of chapters.
versions = { "GNBDC": "416" }     # a dictionary mapping Bible version names (like "GNBDC") to their version IDs on Bible.com.

In [35]:
# Define the Function
def scrape_bible_csv(book_chapters, versions):

    base_url = "https://www.bible.com/bible/"  #This is the base URL of the Bible.com site. You’ll add version ID, book, and chapter to this.
    
    all_verses = []  #This empty list will store all the scraped verses in a structured format (like a dictionary for each verse).

# The Loop Engine
    for version, version_id in versions.items(): #Loop through each Bible version and its corresponding ID.
        for book, max_chapter in book_chapters.items():  #Loop through each book (like "gen") and how many chapters it has.
            for chapter in range(1, max_chapter + 1):    #Loop through chapter numbers from 1 to max_chapter.
# Fetch the Page
                try: #Start a try block to handle any errors (like network issues).

                    url = f"{base_url}{version_id}/{book}.{chapter}" #Build the full URL, e.g., https://www.bible.com/bible/416/gen.1
                    print(f"Scraping: {url} ({version})") #Print the current chapter being scraped.

                    response = requests.get(url) #Use requests to send an HTTP GET request to the Bible page.

                    # If the request fails (not status code 200), skip this chapter and continue.
                    if response.status_code != 200:
                        print(f"Failed to retrieve {book} {chapter} ({version}). Skipping...")
                        continue

                    soup = BeautifulSoup(response.content, 'html.parser') #Parse the HTML content using BeautifulSoup, which lets you search and extract content from the page.
                    verses = soup.find_all('span', class_="ChapterContent_verse__57FIw") #Find all HTML <span> tags where the class is ChapterContent_verse__57FIw. Each of these should contain one verse.
# Extract Each Verse
                 #If no verses were found, print a message and skip the chapter.
                    if not verses:
                        print(f"No verses found for {book} {chapter} ({version}). Skipping...")
                        continue

                    for verse in verses:  #Loop through each verse block found.
                        verse_num_tag = verse.find('span', class_='ChapterContent_label__R2PLt')  #extracts the verse number (e.g., "1").
                        verse_text_tag = verse.find('span', class_='ChapterContent_content__RrUqA') #extracts the actual verse text.

                        # Clean the extracted text, or use fallback values ("?" or "") if something’s missing.
                        verse_num = verse_num_tag.text.strip() if verse_num_tag else "?"
                        verse_text = verse_text_tag.text.strip() if verse_text_tag else ""

                        # Store the verse as a dictionary in all_verses:

                       # Example: { "Book": "GEN", "Chapter": 1, "Verse": "1", "Text": "In the beginning...", "Version": "GNBDC" }

                        if verse_num and verse_text:
                            all_verses.append({
                                "Book": book.upper(),
                                "Chapter": chapter,
                                "Verse": verse_num,
                                "Text": verse_text,
                                "Version": version
                            })

                    time.sleep(1)  #Pause for 1 second before scraping the next chapter (helps avoid getting blocked by the site).

# Final Save and Error Handling
                except Exception as e:
                           print(f"Error processing {book} {chapter} ({version}): {e}. Skipping...")

    df = pd.DataFrame(all_verses)
    df.to_csv('Genesis GNBDC.csv', index=False, encoding='utf-8-sig')
    print("GNBDC Bible verses have been saved to 'Genesis GNBDC.csv'.")

# Call the function
scrape_bible_csv(book_chapters, versions)


Scraping: https://www.bible.com/bible/416/gen.1 (GNBDC)
Scraping: https://www.bible.com/bible/416/gen.2 (GNBDC)
Scraping: https://www.bible.com/bible/416/gen.3 (GNBDC)
Scraping: https://www.bible.com/bible/416/gen.4 (GNBDC)
Scraping: https://www.bible.com/bible/416/gen.5 (GNBDC)
GNBDC Bible verses have been saved to 'Genesis GNBDC.csv'.
