# Diagrama UML

Libro
- id (PK)
- titulo
- precio
- rating
- in_stock
- categoria_id (FK)

Autor
- id (PK)
- nombre

Libro_Autor
- libro_id (FK)
- autor_id (FK)

Categoria
- id (PK)
- nombre

# Esquema SQL

## DDL

CREATE TABLE Categoria (   
- id SERIAL PRIMARY KEY,  
- nombre TEXT NOT NULL  
);


CREATE TABLE Autor (  
- id SERIAL PRIMARY KEY,  
- nombre TEXT NOT NULL  
);  

CREATE TABLE Libro (  
- id SERIAL PRIMARY KEY,  
- titulo TEXT NOT NULL,  
- precio NUMERIC(6, 2) NOT NULL,  
- rating INTEGER NOT NULL,  
- in_stock BOOLEAN NOT NULL,  
- categoria_id INTEGER NOT NULL,  
- FOREIGN KEY (categoria_id) REFERENCES Categoria(id)  
);  

CREATE TABLE Libro_Autor (  
- libro_id INTEGER NOT NULL,  
- autor_id INTEGER NOT NULL,  
- PRIMARY KEY (libro_id, autor_id),  
- FOREIGN KEY (libro_id) REFERENCES Libro(id),  
- FOREIGN KEY (autor_id) REFERENCES Autor(id)  
);

In [4]:
%pip install requests
%pip install beautifulsoup4

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting beautifulsoup4
  Using cached beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4)
  Using cached soupsieve-2.7-py3-none-any.whl.metadata (4.6 kB)
Using cached beautifulsoup4-4.13.4-py3-none-any.whl (187 kB)
Using cached soupsieve-2.7-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.13.4 soupsieve-2.7
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import requests
from bs4 import BeautifulSoup

url = 'https://books.toscrape.com/'

# obtengo el html crudo
response = requests.get(url)

# separar por etiquetas (metafora sopa de etiquetas)
soup = BeautifulSoup(response.text, 'html.parser')

# Extraer los libros en la página donde haya un article con class product_pod
libros = soup.select('article.product_pod')

# Mostrar los primeros 3 títulos como prueba
for libro in libros[:3]:
    titulo = libro.h3.a['title']
    print(titulo)

A Light in the Attic
Tipping the Velvet
Soumission


In [2]:
%pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from sqlalchemy import create_engine, Column, Integer, String, Numeric, Boolean, ForeignKey, Table
from sqlalchemy.orm import relationship, declarative_base

# Base para las clases
Base = declarative_base()

# Tabla intermedia Libro_Autor (relación muchos a muchos)
libro_autor = Table(
    'libro_autor', Base.metadata,
    Column('libro_id', Integer, ForeignKey('libro.id'), primary_key=True),
    Column('autor_id', Integer, ForeignKey('autor.id'), primary_key=True)
)

class Categoria(Base):
    __tablename__ = 'categoria'
    id = Column(Integer, primary_key=True)
    nombre = Column(String, nullable=False)

class Autor(Base):
    __tablename__ = 'autor'
    id = Column(Integer, primary_key=True)
    nombre = Column(String, nullable=False)

class Libro(Base):
    __tablename__ = 'libro'
    id = Column(Integer, primary_key=True)
    titulo = Column(String, nullable=False)
    precio = Column(Numeric(6, 2), nullable=False)
    rating = Column(Integer, nullable=False)
    in_stock = Column(Boolean, nullable=False)
    categoria_id = Column(Integer, ForeignKey('categoria.id'), nullable=False)

    # Relaciones
    categoria = relationship("Categoria")
    autores = relationship("Autor", secondary=libro_autor, backref="libros")


In [4]:
# Crear motor SQLite
engine = create_engine("sqlite:///books.db", echo=True)

# Crear todas las tablas
Base.metadata.create_all(engine)

2025-08-16 09:35:10,489 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-08-16 09:35:10,489 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("libro_autor")
2025-08-16 09:35:10,489 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-08-16 09:35:10,494 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("libro_autor")
2025-08-16 09:35:10,495 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-08-16 09:35:10,497 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("categoria")
2025-08-16 09:35:10,498 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-08-16 09:35:10,500 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("categoria")
2025-08-16 09:35:10,500 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-08-16 09:35:10,502 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("autor")
2025-08-16 09:35:10,503 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-08-16 09:35:10,505 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("autor")
2025-08-16 09:35:10,505 INFO sqlalchemy.engine.Engine [raw

In [14]:
# Todo esto ya hicimos
#url = "https://books.toscrape.com/"
#response = requests.get(url)
#soup = BeautifulSoup(response.text, "html.parser")

# Seleccionar el primer libro
#libros = soup.select('article.product_pod')
if not libros:
    raise ValueError("No se encontraron libros en la página principal")

primer_libro = libros[0]

# Extraer URL del detalle 
detalle_rel_url = primer_libro.h3.a["href"]
if detalle_rel_url.startswith("../../../"):
    detalle_rel_url = detalle_rel_url.replace("../../../", "catalogue/")
elif detalle_rel_url.startswith("../../"):
    detalle_rel_url = detalle_rel_url.replace("../../", "catalogue/")
elif detalle_rel_url.startswith("../"):
    detalle_rel_url = detalle_rel_url.replace("../", "catalogue/")

detalle_url = url + detalle_rel_url

# Verificar URL antes de hacer la solicitud
print(f"URL de detalle a acceder: {detalle_url}")

# Configurar headers para parecer un navegador real
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# Hacer la solicitud con manejo de errores
try:
    resp_detalle = requests.get(detalle_url, headers=headers, timeout=10)
    resp_detalle.raise_for_status()  # Lanza error si la respuesta no es 200
except requests.exceptions.RequestException as e:
    print(f"Error al acceder a {detalle_url}: {e}")
    raise

soup_detalle = BeautifulSoup(resp_detalle.text, "html.parser")

# Extracción de datos con verificación de existencia
def safe_extract(selector, attribute=None):
    element = soup_detalle.select_one(selector)
    if not element:
        return None
    return element.text if attribute is None else element.get(attribute)

# Precio
precio_element = soup_detalle.select_one("p.price_color")
if not precio_element:
    # Buscar alternativas si el selector principal falla
    precio_element = soup_detalle.select_one(".product_main .price_color")
    
if precio_element:
    precio = float(precio_element.text.replace("Â£", ""))
else:
    precio = None
    print("Advertencia: No se encontró el precio del libro")

# Resto de los datos con manejo seguro
titulo = safe_extract(".product_main h1")
stock_texto = safe_extract("p.instock.availability")
in_stock = stock_texto and "In stock" in stock_texto

# Rating con manejo de errores
rating_tag = soup_detalle.select_one("p.star-rating")
rating = 0
if rating_tag:
    rating_clases = rating_tag.get("class", [])
    if len(rating_clases) > 1:
        rating_clase = rating_clases[1]
        mapa_rating = {"One": 1, "Two": 2, "Three": 3, "Four": 4, "Five": 5}
        rating = mapa_rating.get(rating_clase, 0)

# Categoría
categoria_element = soup_detalle.select("ul.breadcrumb li a")
categoria = categoria_element[-1].text if categoria_element else None

# Autor
def get_author_from_google_books(title, category):
    
    query = f'intitle:"{title}"'
    query += f' subject:"{category}"'

    try:

        # Intentamos primero con Open Library 
        response = requests.get(
            "https://openlibrary.org/search.json",
            params={"title": title, "limit": 1}
        )
        ol_data = response.json()
        if ol_data.get("docs") and ol_data["docs"][0].get("author_name"):
            return ol_data["docs"][0]["author_name"][0]
        
        # Si no, buscamos en Google books
        response = requests.get(
            "https://www.googleapis.com/books/v1/volumes",
            params={
                "q": query,
                "maxResults": 1,
                "orderBy": "relevance",
                "langRestrict": "en"}
        )
        data = response.json()
        if data.get("items"):
            return data["items"][0]["volumeInfo"].get("authors", [None])[0]
    except Exception as e:
        print(f"Error al consultar Google Books: {e}")
    return None

autor = get_author_from_google_books(titulo, categoria)

print("Título:", titulo)
print("Precio:", precio)
print("Stock:", in_stock)
print("Rating:", rating)
print("Categoría:", categoria)
print("Autor:", autor)

URL de detalle a acceder: https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html
Título: A Light in the Attic
Precio: 51.77
Stock: True
Rating: 3
Categoría: Poetry
Autor: Shel Silverstein
