In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


def scrape_books(min_rating=4, max_price=20, max_pages=1):
    base_url = "https://books.toscrape.com/catalogue/page-{}.html"
    books_data = []

    rating_map = {'One': 1, 'Two': 2, 'Three': 3, 'Four': 4, 'Five': 5}

    page = 1
    while page <= max_pages:
        url = base_url.format(page)
        response = requests.get(url)
        response.encoding = 'utf-8'
        if response.status_code != 200:
            break

        soup = BeautifulSoup(response.text, 'html.parser')
        books = soup.select('ol.row > li')

        if not books:
            break

        for book in books:
            try:
                rating_class = book.find('p', class_='star-rating')['class'][1]
                rating = rating_map.get(rating_class, 0)

                price = float(book.find('p', class_='price_color').text.replace('£', '').strip())

                if rating >= min_rating and price <= max_price:
                    title = book.h3.a['title'].strip().replace(u'\xa0', u' ')
                    book_url = 'https://books.toscrape.com/catalogue/' + book.h3.a['href'].replace('index.html', '')

                    # Detalles adicionales del libro
                    book_response = requests.get(book_url)
                    book_response.encoding = 'utf-8'
                    book_soup = BeautifulSoup(book_response.text, 'html.parser')

                    upc = book_soup.find('th', string='UPC').find_next_sibling('td').text.strip()
                    availability = book.find('p', class_='instock availability').text.strip().replace(u'\xa0', u' ')
                    genre = book_soup.find('ul', class_='breadcrumb').find_all('li')[2].text.strip()
                    description_tag = book_soup.find('meta', attrs={'name': 'description'})
                    description = description_tag['content'].strip().replace(u'\xa0', u' ') if description_tag else 'No description'
                    description = description.replace('\n', ' ').replace('\r', ' ').strip()

                    books_data.append({
                        'UPC': upc,
                        'Title': title,
                        'Price (£)': price,
                        'Rating': rating,
                        'Genre': genre,
                        'Availability': availability,
                        'Description': description
                    })

            except Exception as e:
                print(f"Error procesando un libro: {e}")
                continue

        # Comprobar si hay más páginas
        next_page = soup.select_one('li.next > a')
        if not next_page:
            break
        page += 1

    df = pd.DataFrame(books_data)
    return df

# Ejemplo de uso
result_df = scrape_books(min_rating=4, max_price=100, max_pages=1)
print(result_df.head())

# Guardar en CSV si se desea
result_df.to_csv('data/budgets.csv', index=False)