In [1]:
%pip install requests beautifulsoup4 pandas



In [2]:
import requests
from bs4 import BeautifulSoup
import re

def scrape_books(base_url):
    book_list = []
    url = base_url
    while True:
        try:
            response = requests.get(url)
            response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)
        except requests.exceptions.RequestException as e:
            print(f"Error fetching URL {url}: {e}")
            break

        soup = BeautifulSoup(response.content, 'html.parser')
        articles = soup.find_all('article', class_='product_pod')

        if not articles:
            print(f"No book articles found on {url}. Stopping.")
            break

        for article in articles:
            title = article.h3.a['title']
            price_str = article.select_one('.price_color').get_text()
            price = float(re.search(r'\d+\.?\d*', price_str).group())

            rating_class = article.select_one('.star-rating')['class']
            # The rating is the second class name
            rating_map = {'One': 1, 'Two': 2, 'Three': 3, 'Four': 4, 'Five': 5}
            rating = rating_map.get(rating_class[1], 0) # Default to 0 if class not found

            book_list.append({'title': title, 'price': price, 'rating': rating})

        next_button = soup.find('li', class_='next')
        if next_button:
            next_page_url = next_button.a['href']
            url = requests.compat.urljoin(url, next_page_url)
            print(f"Moving to next page: {url}")
        else:
            print("No next button found. Ending scraping.")
            break

    return book_list

base_url = "http://books.toscrape.com/"
scraped_data = scrape_books(base_url)
print(f"Scraped {len(scraped_data)} books.")

Moving to next page: http://books.toscrape.com/catalogue/page-2.html
Moving to next page: http://books.toscrape.com/catalogue/page-3.html
Moving to next page: http://books.toscrape.com/catalogue/page-4.html
Moving to next page: http://books.toscrape.com/catalogue/page-5.html
Moving to next page: http://books.toscrape.com/catalogue/page-6.html
Moving to next page: http://books.toscrape.com/catalogue/page-7.html
Moving to next page: http://books.toscrape.com/catalogue/page-8.html
Moving to next page: http://books.toscrape.com/catalogue/page-9.html
Moving to next page: http://books.toscrape.com/catalogue/page-10.html
Moving to next page: http://books.toscrape.com/catalogue/page-11.html
Moving to next page: http://books.toscrape.com/catalogue/page-12.html
Moving to next page: http://books.toscrape.com/catalogue/page-13.html
Moving to next page: http://books.toscrape.com/catalogue/page-14.html
Moving to next page: http://books.toscrape.com/catalogue/page-15.html
Moving to next page: http://

In [3]:
import pandas as pd

df = pd.DataFrame(scraped_data)
display(df.head())
display(df.info())

Unnamed: 0,title,price,rating
0,A Light in the Attic,51.77,3
1,Tipping the Velvet,53.74,1
2,Soumission,50.1,1
3,Sharp Objects,47.82,4
4,Sapiens: A Brief History of Humankind,54.23,5


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   title   1000 non-null   object 
 1   price   1000 non-null   float64
 2   rating  1000 non-null   int64  
dtypes: float64(1), int64(1), object(1)
memory usage: 23.6+ KB


None

In [4]:
df.to_csv('books.csv', index=False)

In [5]:
average_price = df['price'].mean()
best_rated_books = df[df['rating'] == df['rating'].max()]
print(f"Average book price: {average_price:.2f}")
print("\nBest rated book(s):")
display(best_rated_books)

Average book price: 35.07

Best rated book(s):


Unnamed: 0,title,price,rating
4,Sapiens: A Brief History of Humankind,54.23,5
12,Set Me Free,17.46,5
13,Scott Pilgrim's Precious Little Life (Scott Pi...,52.29,5
14,Rip it Up and Start Again,35.02,5
23,Chase Me (Paris Nights #2),25.27,5
...,...,...,...
985,Deep Under (Walker Security #1),47.09,5
989,Bright Lines,39.07,5
993,"Bleach, Vol. 1: Strawberry and the Soul Reaper...",34.65,5
997,A Spy's Devotion (The Regency Spies of London #1),16.97,5


In [6]:
print(f"Average book price: {average_price:.2f}")
print("\nBest rated book(s):")
display(best_rated_books)

Average book price: 35.07

Best rated book(s):


Unnamed: 0,title,price,rating
4,Sapiens: A Brief History of Humankind,54.23,5
12,Set Me Free,17.46,5
13,Scott Pilgrim's Precious Little Life (Scott Pi...,52.29,5
14,Rip it Up and Start Again,35.02,5
23,Chase Me (Paris Nights #2),25.27,5
...,...,...,...
985,Deep Under (Walker Security #1),47.09,5
989,Bright Lines,39.07,5
993,"Bleach, Vol. 1: Strawberry and the Soul Reaper...",34.65,5
997,A Spy's Devotion (The Regency Spies of London #1),16.97,5
