In [4]:
import requests
from bs4 import BeautifulSoup
import csv

In [None]:
def fetch_book_ids(base_url):
    """
    Scrape book ID from Project Gutenberg book which written in Chinese.
    
    Args:
    book_id:book ID number from Project Gutenberg.
    """
    response = requests.get(base_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        
        book_ids = []
        for link in soup.find_all('a', href=True):
            href = link['href']            
            if '/ebooks/' in href:
                book_id = href.split('/')[-1]  
                if book_id.isdigit():  
                    book_ids.append(book_id)
        
        return book_ids
    else:
        print(f"Failed to fetch page, Status code: {response.status_code}")
        return []

def save_to_csv(book_ids, output_csv):
    """
    save book ID to CSV.

    Args:
    unique_book_ids:save book ID to CSV and make sure not duplicate.
    """
    unique_book_ids = set(book_ids)
    with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        for book_id in unique_book_ids:
            writer.writerow([book_id])  
    print(f"CSV file has been saved to {output_csv}.")


def main(base_url, output_csv):   
    """Main function to execute book ID scraping process."""
    book_ids = fetch_book_ids(base_url)
    if book_ids:
        save_to_csv(book_ids, output_csv)

if __name__ == "__main__":
    base_url = "https://www.gutenberg.org/browse/languages/zh"
    output_csv = "book_ids.csv"
    main(base_url, output_csv)


CSV file has been saved to book_ids.csv.
