In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

base_url = "http://books.toscrape.com/catalogue/page-{}.html"
books_data = []

for page in range(1, 30):
    url = base_url.format(page)
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")

    books = soup.find_all("article", class_="product_pod")
    for book in books:
        title = book.h3.a["title"]
        raw_price = book.find("p", class_="price_color").text
        price = re.sub(r'[^\d\.]', '', raw_price)  # Remove £ and unwanted chars
        rating = book.p["class"][1]
        availability = book.select_one(".availability").text.strip()

        books_data.append({
            "Title": title,
            "Price": float(price),
            "Rating": rating,
            "Availability": availability
        })

df = pd.DataFrame(books_data)
df.to_csv("books.csv", index=False)
print(df.head())


                                   Title  Price Rating Availability
0                   A Light in the Attic  51.77  Three     In stock
1                     Tipping the Velvet  53.74    One     In stock
2                             Soumission  50.10    One     In stock
3                          Sharp Objects  47.82   Four     In stock
4  Sapiens: A Brief History of Humankind  54.23   Five     In stock


In [3]:
# Add this dictionary to map text to number
rating_map = {
    'One': 1,
    'Two': 2,
    'Three': 3,
    'Four': 4,
    'Five': 5
}

# When scraping, convert like this:
books_data.append({
    "Title": title,
    "Price": float(price),
    "Rating": rating_map.get(rating, 0),  # Default to 0 if unknown
    "Availability": availability
})


In [4]:
import pandas as pd

# Load the existing CSV
df = pd.read_csv('books.csv')

# Define the mapping
rating_map = {
    'One': 1,
    'Two': 2,
    'Three': 3,
    'Four': 4,
    'Five': 5
}

# Apply the mapping
df['Rating'] = df['Rating'].map(rating_map)

# Save back to CSV
df.to_csv('books_clean.csv', index=False)


In [5]:
df.head()

Unnamed: 0,Title,Price,Rating,Availability
0,A Light in the Attic,51.77,3,In stock
1,Tipping the Velvet,53.74,1,In stock
2,Soumission,50.1,1,In stock
3,Sharp Objects,47.82,4,In stock
4,Sapiens: A Brief History of Humankind,54.23,5,In stock
