# Mini- projet : extraction des titres et prix des livres

## Libraries & functions

In [19]:
from bs4 import BeautifulSoup
import requests
from word2number import w2n
import pandas as pd

In [None]:
class BooksToScrapProcessor:
    def __init__(self):
        pass

    def get_link_from_page(self, page:int):
        """ """
        return "https://books.toscrape.com/catalogue/page-" + str(page) + ".html"
    
    def scrap_url(self, url:str):
        return requests.get(url)
    
    def scrap_page(self, page:int):
        url = self.get_link_from_page(page)
        return self.scrap_url(url)

## Extraction des données avec BeautifulSoup

Les objectifs sont les suivants :
- Extraire uniquement les livres **les mieux notés (>4 étoiles)** et **moins chers que 30£** présentés **sur les 3 premières pages**. Enregistrer le résultat dans un fichier `csv`.
- BONUS : créer un classement des 10 catégories ayant les livres les plus chers en moyenne.

In [6]:
processor = BooksToScrapProcessor()

In [31]:
data = {
    "title": [],
    "rating": [],
    "price": []
    }

for page in [1,2,3]:
    page_content = BeautifulSoup(processor.scrap_page(page).content, 
                                 "html.parser")
    current_books = page_content.find_all("article", class_="product_pod")
    for book in current_books:
        # Get this book's title, rating, price & category
        title = book.find("h3").find("a")["title"]
        rating = w2n.word_to_num(
            book.find("p", class_="star-rating")["class"][1]
        )
        price = float(
            book.find("p", class_="price_color").text[1:]
        )
        # Save data for this book
        data["title"].append(title)
        data["rating"].append(rating)
        data["price"].append(price)

df = pd.DataFrame(data)
df.to_csv("../data/all_books_p1-3.csv")

In [33]:
df_filtered = df[
    (df["rating"] > 4) &
    (df["price"] < 30)
]

df_filtered.to_csv('../data/filtered_books_p1-3.csv')
df_filtered

Unnamed: 0,title,rating,price
12,Set Me Free,5,17.46
23,Chase Me (Paris Nights #2),5,25.27
30,The Four Agreements: A Practical Guide to Pers...,5,17.66
32,The Elephant Tree,5,23.82
34,Sophie's World,5,15.94
43,#HigherSelfie: Wake Up Your Life. Free Your So...,5,23.11
54,Thirst,5,17.27
