In [3]:

# STEP 0: Install required packages

!pip install requests
!pip install beautifulsoup4
!pip install pandas


# STEP 1: Import libraries

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time


# STEP 2: Initialize lists to store data

titles = []
prices = []
ratings = []
availability = []
product_urls = []

# STEP 3: Scrape all 50 pages

base_url = "http://books.toscrape.com/catalogue/page-{}.html"
site_url = "http://books.toscrape.com/catalogue/"

for page in range(1, 51):
    url = base_url.format(page)
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to load page {page}")
        continue

    soup = BeautifulSoup(response.text, 'html.parser')

    books = soup.find_all('article', class_='product_pod')

    for book in books:
        # Title
        title = book.h3.a['title']
        titles.append(title)

        # Price
        price = book.find('p', class_='price_color').text
        prices.append(price)

        # Rating
        rating_class = book.find('p', class_='star-rating')['class']
        rating_word = rating_class[1]
        ratings.append(rating_word)

        # Availability
        avail = book.find('p', class_='instock availability').text.strip()
        availability.append(avail)

        # Product URL
        partial_url = book.h3.a['href']
        full_url = site_url + partial_url
        product_urls.append(full_url)

    print(f"Page {page} done")
    time.sleep(1)


# STEP 4: Create DataFrame

data = pd.DataFrame({
    'Title': titles,
    'Price': prices,
    'Rating': ratings,
    'Availability': availability,
    'Product_URL': product_urls
})


# STEP 5: Clean data

# Convert Ratings to numbers
rating_map = {'One':1, 'Two':2, 'Three':3, 'Four':4, 'Five':5}
data['Rating'] = data['Rating'].map(rating_map)

# Save CSV with proper encoding
data.to_csv('books_dataset_clean.csv', index=False, encoding='utf-8-sig')


# STEP 6: Show sample data

data.head(10)


Page 1 done
Page 2 done
Page 3 done
Page 4 done
Page 5 done
Page 6 done
Page 7 done
Page 8 done
Page 9 done
Page 10 done
Page 11 done
Page 12 done
Page 13 done
Page 14 done
Page 15 done
Page 16 done
Page 17 done
Page 18 done
Page 19 done
Page 20 done
Page 21 done
Page 22 done
Page 23 done
Page 24 done
Page 25 done
Page 26 done
Page 27 done
Page 28 done
Page 29 done
Page 30 done
Page 31 done
Page 32 done
Page 33 done
Page 34 done
Page 35 done
Page 36 done
Page 37 done
Page 38 done
Page 39 done
Page 40 done
Page 41 done
Page 42 done
Page 43 done
Page 44 done
Page 45 done
Page 46 done
Page 47 done
Page 48 done
Page 49 done
Page 50 done


Unnamed: 0,Title,Price,Rating,Availability,Product_URL
0,A Light in the Attic,Â£51.77,3,In stock,http://books.toscrape.com/catalogue/a-light-in...
1,Tipping the Velvet,Â£53.74,1,In stock,http://books.toscrape.com/catalogue/tipping-th...
2,Soumission,Â£50.10,1,In stock,http://books.toscrape.com/catalogue/soumission...
3,Sharp Objects,Â£47.82,4,In stock,http://books.toscrape.com/catalogue/sharp-obje...
4,Sapiens: A Brief History of Humankind,Â£54.23,5,In stock,http://books.toscrape.com/catalogue/sapiens-a-...
5,The Requiem Red,Â£22.65,1,In stock,http://books.toscrape.com/catalogue/the-requie...
6,The Dirty Little Secrets of Getting Your Dream...,Â£33.34,4,In stock,http://books.toscrape.com/catalogue/the-dirty-...
7,The Coming Woman: A Novel Based on the Life of...,Â£17.93,3,In stock,http://books.toscrape.com/catalogue/the-coming...
8,The Boys in the Boat: Nine Americans and Their...,Â£22.60,4,In stock,http://books.toscrape.com/catalogue/the-boys-i...
9,The Black Maria,Â£52.15,1,In stock,http://books.toscrape.com/catalogue/the-black-...
