In [12]:
import requests
from bs4 import BeautifulSoup
import csv
import time

In [16]:
BASE_URL = "https://www.bayut.om"
START_URL = f"{BASE_URL}/en/oman/properties-for-sale/"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}
SLEEP_BETWEEN_LISTINGS = 1.5
SLEEP_BETWEEN_PAGES = 5
MAX_PAGES = 76  # safety limit


def get_soup(url):
    try:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        return BeautifulSoup(response.text, 'html.parser')
    except Exception as e:
        print(f"❌ Error fetching {url}: {e}")
        return None


def extract_listing_links(soup):
    links = []
    cards = soup.find_all("article")
    for card in cards:
        a_tag = card.find("a", href=True)
        if a_tag:
            full_url = BASE_URL + a_tag["href"]
            links.append(full_url)
    return links


def safe_find(soup, tag_name, attrs=None):
    tag = soup.find(tag_name, attrs=attrs)
    return tag.text.strip() if tag else None


def extract_property_information(soup):
    info = {}

    label_map = {
        "Type": "property_type",
        "Purpose": "purpose",
        "Completion": "completion",
        "Furnishing": "furnishing",
        "Added on": "added_on",
        "Reference": "listing_id"
    }

    for label, key in label_map.items():
        tag = soup.find("span", attrs={"aria-label": label})
        if tag:
            info[key] = tag.get_text(strip=True)

    # Clean listing_id if needed
    if "listing_id" in info and "ID" in info["listing_id"]:
        info["listing_id"] = info["listing_id"].split("ID")[-1].strip()

    return info


def extract_details_from_listing(url):
    soup = get_soup(url)
    if not soup:
        return None

    try:
        details = {
            "price": safe_find(soup, "span", {"aria-label": "Price"}),
            "location": safe_find(soup, "div", {"aria-label": "Property header"}),
            "size_sqm": safe_find(soup, "span", {"aria-label": "Area"}),
            "bedrooms": safe_find(soup, "span", {"aria-label": "Beds"}),
            "bathrooms": safe_find(soup, "span", {"aria-label": "Baths"}),
            "listing_type": safe_find(soup, "span", {"aria-label": "Type"}),
            "link": url
        }

        # Extract title from the Property overview section
        overview_section = soup.find("div", attrs={"aria-label": "Property overview"})
        if overview_section:
            title_tag = overview_section.find("h1")
            details["title"] = title_tag.get_text(strip=True) if title_tag else None
        else:
            details["title"] = None

        extra_fields = extract_property_information(soup)
        details.update(extra_fields)

        return details

    except Exception as e:
        print(f"❌ Error parsing {url}: {e}")
        return None


def save_to_csv(data, filename):
    if not data:
        print("⚠️ No data to save.")
        return
    fieldnames = sorted({k for row in data for k in row.keys()})
    with open(filename, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)
    print(f"\n✅ Saved {len(data)} listings to {filename}")


def scrape_bayut_properties():
    all_properties = []
    page = 1

    while page <= MAX_PAGES:
        print(f"\n🔎 Scraping listing page {page}...")
        url = START_URL if page == 1 else f"{START_URL}page-{page}/"
        soup = get_soup(url)
        if not soup:
            break

        listing_urls = extract_listing_links(soup)
        if not listing_urls:
            print("🚫 No more listings found.")
            break

        for idx, listing_url in enumerate(listing_urls, start=1):
            print(f"  [{idx}/{len(listing_urls)}] Visiting: {listing_url}")
            data = extract_details_from_listing(listing_url)
            if data:
                all_properties.append(data)
            time.sleep(SLEEP_BETWEEN_LISTINGS)

        page += 1
        time.sleep(SLEEP_BETWEEN_PAGES)

    save_to_csv(all_properties, "bayut_properties.csv")


if __name__ == "__main__":
    scrape_bayut_properties()



🔎 Scraping listing page 1...
  [1/24] Visiting: https://www.bayut.om/en/property/details-500024656.html
  [2/24] Visiting: https://www.bayut.om/en/property/details-500024649.html
  [3/24] Visiting: https://www.bayut.om/en/property/details-500024648.html
  [4/24] Visiting: https://www.bayut.om/en/property/details-500024621.html
  [5/24] Visiting: https://www.bayut.om/en/property/details-500024573.html
  [6/24] Visiting: https://www.bayut.om/en/property/details-500024653.html
  [7/24] Visiting: https://www.bayut.om/en/property/details-500024652.html
  [8/24] Visiting: https://www.bayut.om/en/property/details-500024624.html
  [9/24] Visiting: https://www.bayut.om/en/property/details-500024628.html
  [10/24] Visiting: https://www.bayut.om/en/property/details-500024627.html
  [11/24] Visiting: https://www.bayut.om/en/property/details-500024622.html
  [12/24] Visiting: https://www.bayut.om/en/property/details-130322503.html
  [13/24] Visiting: https://www.bayut.om/en/property/details-130329