In [18]:
import requests
import time
from bs4 import BeautifulSoup

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
    " (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
}

title = []
links = []


def casa_sapo(url: str, max_pages: int) -> dict:
    """
    Scrapes listing titles and links from Casa Sapo website for a specified number of pages.

    This function navigates through the specified number of pages on the Casa Sapo website,
    collecting titles and corresponding links for property listings. It applies a respectful
    delay between requests to avoid overloading the server. If no more listings are found on
    a page, the scraping stops early. Encountered errors during requests are caught and logged.

    Parameters:
    - url (str): The base URL to scrape, formatted to include pagination.
    - max_pages (int): The maximum number of pages to scrape.

    Returns:
    - dict: A dictionary with listing titles as keys and corresponding links
    """

    for num in range(1, max_pages + 1):
        try:
            page = requests.get(f"{url}{num}", headers=get_headers())
            soup = BeautifulSoup(page.text, "html.parser")

            span_tags = soup.find_all("div", class_="property-type")

            # Dynamic stop condition: No listings found on page
            if not span_tags:
                break

            for span_tag in span_tags:
                title.append(span_tag.text.strip())
                a_tag = span_tag.find_parent("a")
                if a_tag and a_tag.has_attr("href"):
                    links.append(a_tag["href"][112:])

            time.sleep(1)  # Respectful delay between requests

        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}")
            break
    data = {title: link for title, link in zip(title, links)}
    return data