# Idealista Single Listing Parser

Parse a specific Idealista listing URL using Scrapfly and BeautifulSoup.
Uses the same fields as the existing `Listing` dataclass.

In [10]:
import json
from dataclasses import dataclass, asdict
from bs4 import BeautifulSoup
from scrapfly import ScrapflyClient, ScrapeConfig
from config import SCRAPFLY_API_KEY

In [11]:
scrapfly = ScrapflyClient(key=SCRAPFLY_API_KEY)

In [12]:
@dataclass
class Listing:
    """Represents a property listing (same as scraper.py)."""
    url: str
    title: str
    price: str
    price_value: float
    rooms: str
    size: str
    floor: str
    description: str
    thumbnail: str = ""

In [13]:
def fetch_listing_html(url: str) -> str:
    """Fetch the HTML content of a listing page using Scrapfly."""
    result = scrapfly.scrape(
        ScrapeConfig(
            url=url,
            asp=True,
            country="ES",
            render_js=True,
        )
    )
    
    if not result.success:
        raise Exception(f"Scrape failed: {result.upstream_status_code}")
    
    print(f"Fetched {len(result.content)} chars, status: {result.upstream_status_code}")
    return result.content

In [14]:
def parse_price(price_text: str) -> tuple[str, float]:
    """Parse price text into formatted string and numeric value."""
    if not price_text:
        return "N/A", 0.0
    
    clean = price_text.replace("€", "").replace(".", "").replace(",", ".").split('/')[0].strip()
    try:
        value = float(clean.split()[0])
        formatted = f"{value:,.0f} €".replace(",", " ")
        return formatted, value
    except (ValueError, IndexError):
        return price_text, 0.0

In [15]:
def parse_listing(html: str, url: str) -> Listing:
    """Parse a single listing page HTML into Listing."""
    soup = BeautifulSoup(html, "html.parser")
    
    # Title
    title_elem = soup.find("span", class_="main-info__title-main")
    title = title_elem.get_text(strip=True) if title_elem else ""
    
    # Price
    price_elem = soup.find("span", class_="info-data-price")
    price_text = price_elem.get_text(strip=True) if price_elem else ""
    price_formatted, price_value = parse_price(price_text)
    
    # Basic info (rooms, size, floor)
    rooms = "N/A"
    size = "N/A"
    floor = "N/A"
    
    info_features = soup.find("div", class_="info-features")
    if info_features:
        spans = info_features.find_all("span")
        for span in spans:
            text = span.get_text(strip=True).lower()
            if "hab" in text or "room" in text:
                rooms = span.get_text(strip=True)
            elif "m²" in text or "m2" in text:
                size = span.get_text(strip=True)
    
    # Floor - look in details section
    details_section = soup.find("section", class_="details-property")
    if details_section:
        for li in details_section.find_all("li"):
            text = li.get_text(strip=True).lower()
            if "planta" in text or "piso" in text or "floor" in text:
                floor = li.get_text(strip=True)
                break
    
    # Description
    desc_elem = soup.find("div", class_="comment")
    if not desc_elem:
        desc_elem = soup.find("div", class_="adCommentsLanguage")
    description = desc_elem.get_text(strip=True) if desc_elem else ""
    
    # Thumbnail - find actual listing image (class="image-focus"), not logo
    thumbnail = ""
    img_elem = soup.find("img", class_="image-focus")
    if img_elem:
        thumbnail = img_elem.get("src") or img_elem.get("data-src") or ""
    
    # Fallback: find any img from img*.idealista.com (actual photos, not st3 static assets)
    if not thumbnail:
        for img in soup.find_all("img"):
            src = img.get("src") or img.get("data-src") or ""
            if "img" in src and "idealista.com/blur" in src:
                thumbnail = src
                break
    
    return Listing(
        url=url,
        title=title,
        price=price_formatted,
        price_value=price_value,
        rooms=rooms,
        size=size,
        floor=floor,
        description=description,
        thumbnail=thumbnail,
    )

In [16]:
def scrape_listing(url: str) -> Listing:
    """Scrape and parse a single listing."""
    html = fetch_listing_html(url)
    return parse_listing(html, url)

## Test

In [17]:
test_url = "https://www.idealista.com/inmueble/110252034/"

listing = scrape_listing(test_url)
print(json.dumps(asdict(listing), indent=2, ensure_ascii=False))

Fetched 747958 chars, status: 200
{
  "url": "https://www.idealista.com/inmueble/110252034/",
  "title": "Alquiler de piso en Calle d'Aragó",
  "price": "1 680 €",
  "price_value": 1680.0,
  "rooms": "3 hab.",
  "size": "105 m²",
  "floor": "N/A",
  "description": "Magnífica vivienda muy luminosa, con agradables vistas a la Sagrada Familia. Dispone de tres habitaciones, una doble y dos individuales, dos con armarios a medida.Cuenta con un baño completo equipado con cabina de hidromasaje y sauna, ideal para el confort diario.El salón-comedor, se caracteriza por sus grandes ventanales, que aportan abundante luz natural a toda la vivienda.La cocina, totalmente equipada, presenta un diseño moderno y funcional, con salida a lavadero independiente.El piso se entrega semiamueblado, incluyendo armarios empotrados en dormitorios, zona de despacho y mueble en el salón.La finca dispone de servicio de conserjería y se encuentra en una excelente ubicación, con muy buena comunicación mediante transp

## Debug: Inspect HTML

In [None]:
html = fetch_listing_html(test_url)
soup = BeautifulSoup(html, "html.parser")

# Save for inspection
with open("debug_listing.html", "w", encoding="utf-8") as f:
    f.write(soup.prettify())
print("Saved to debug_listing.html")