In [1]:
# Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from splinter import Browser

In [2]:
# Base URL of the website
base_url = "http://books.toscrape.com/catalogue/page-{}.html"

In [3]:
# List to store all the book details
all_books = []

In [4]:

# Loop through all 50 pages
for page in range(1, 26):
    # Construct the URL for each page
    i=1
    url = base_url.format(page)
    print(url)
    # Send an HTTP request to the URL
    response = requests.get(url)
    
    # Ensure the request was successful
    if response.status_code == 200:
        # Parse the content of the request with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Step 3: Extract book details
        # Find all book containers on the page
        books = soup.find_all('article', class_='product_pod')
        
        # Loop through each book and extract details
        for book in books:
            title = book.h3.a['title']
            price = book.find('p', class_='price_color').text
            availability = book.find('p', class_='instock availability').text.strip()
            rating = book.p['class'][1]  # The class attribute contains the rating information
            link = "http://books.toscrape.com/catalogue/" + book.h3.a['href']
            print(i, title)
            i+=1
            # Append the extracted details to the list
            all_books.append({
                'Title': title,
                'Price': price,
                'Availability': availability,
                'Rating': rating,
                'Link': link
            })
    
    # Pause for a short period to avoid overwhelming the server
    time.sleep(1)

http://books.toscrape.com/catalogue/page-1.html
1 A Light in the Attic
2 Tipping the Velvet
3 Soumission
4 Sharp Objects
5 Sapiens: A Brief History of Humankind
6 The Requiem Red
7 The Dirty Little Secrets of Getting Your Dream Job
8 The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull
9 The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics
10 The Black Maria
11 Starving Hearts (Triangular Trade Trilogy, #1)
12 Shakespeare's Sonnets
13 Set Me Free
14 Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)
15 Rip it Up and Start Again
16 Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991
17 Olio
18 Mesaerion: The Best Science Fiction Stories 1800-1849
19 Libertarianism for Beginners
20 It's Only the Himalayas
http://books.toscrape.com/catalogue/page-2.html
1 In Her Wake
2 How Music Works
3 Foolproof Preserving: A Guide to Small Batch Jams, Jellies, Pickles, Condiments, and More: A

In [5]:
# Convert the list of books into a DataFrame
books_df = pd.DataFrame(all_books)

In [6]:
# Step 5: Save the DataFrame to a CSV file
books_df.to_csv('books_toscrape.csv', index=False)

In [7]:
books_df

Unnamed: 0,Title,Price,Availability,Rating,Link
0,A Light in the Attic,Â£51.77,In stock,Three,http://books.toscrape.com/catalogue/a-light-in...
1,Tipping the Velvet,Â£53.74,In stock,One,http://books.toscrape.com/catalogue/tipping-th...
2,Soumission,Â£50.10,In stock,One,http://books.toscrape.com/catalogue/soumission...
3,Sharp Objects,Â£47.82,In stock,Four,http://books.toscrape.com/catalogue/sharp-obje...
4,Sapiens: A Brief History of Humankind,Â£54.23,In stock,Five,http://books.toscrape.com/catalogue/sapiens-a-...
...,...,...,...,...,...
495,Unreasonable Hope: Finding Faith in the God Wh...,Â£46.33,In stock,Two,http://books.toscrape.com/catalogue/unreasonab...
496,Under the Tuscan Sun,Â£37.33,In stock,Three,http://books.toscrape.com/catalogue/under-the-...
497,Toddlers Are A**holes: It's Not Your Fault,Â£25.55,In stock,One,http://books.toscrape.com/catalogue/toddlers-a...
498,The Year of Living Biblically: One Man's Humbl...,Â£34.72,In stock,One,http://books.toscrape.com/catalogue/the-year-o...


In [None]:
# Google Books API endpoint
google_books_api = "https://www.googleapis.com/books/v1/volumes"
i=0
book_details = pd.DataFrame()

In [None]:
# Function to fetch book details from Google Books API
def fetch_google_books_details(title,i):
    # Define the parameters for the API request
    print(i , title)
    params = {
        'q': title,
        'maxResults': 1,
        'printType': 'books'
    }
    
    # Send the request to the Google Books API
    response = requests.get(google_books_api, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        if 'items' in data:

            volume_info = data['items'][0]['volumeInfo']
            print(volume_info)
            return {
                'Title':title,
                'Authors': ', '.join(volume_info.get('authors', [])),
                'Publisher': volume_info.get('publisher', ''),
                'Published Date': volume_info.get('publishedDate', ''),
                'Description': volume_info.get('description', ''),
                'ISBN-13': next((identifier['identifier'] for identifier in volume_info.get('industryIdentifiers', []) if identifier['type'] == 'ISBN_13'), '')
            }
    return {}

In [None]:
# Create lists to store the fetched details
titles = []
authors = []
publishers = []
published_dates = []
descriptions = []
isbn13s = []


In [None]:

# Fetch details for each book title

for index, row in books_df.iterrows():
    i+=1
    title = row['Title']
    details = fetch_google_books_details(title,i)
    
    # Append the details to the respective lists
    titles.append(details.get('Title'))
    authors.append(details.get('Authors', ''))
    publishers.append(details.get('Publisher', ''))
    published_dates.append(details.get('Published Date', ''))
    descriptions.append(details.get('Description', ''))
    isbn13s.append(details.get('ISBN-13', ''))
    
    # Pause for a short period to avoid hitting the API rate limit
    time.sleep(1)

In [None]:
# Add the fetched details to the DataFrame
book_details['Title']= titles
book_details['Authors'] = authors
book_details['Publisher'] = publishers
book_details['Published Date'] = published_dates
book_details['Description'] = descriptions
book_details['ISBN-13'] = isbn13s

In [None]:
book_details

In [None]:
# Save the enriched DataFrame to a new CSV file
book_details.to_csv('books_detail.csv', index=False)

print(' DataFrame saved as books_detail.csv')