In [1]:
import requests
from bs4 import BeautifulSoup
import time
import csv

In [2]:
BASE_URL = "https://www.dubizzle.com.om"
START_URL = f"{BASE_URL}/en/properties/properties-for-sale/"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

SLEEP_BETWEEN_LISTINGS = 1.5
SLEEP_BETWEEN_PAGES = 5
MAX_PAGES = 199


def get_soup(url):
    try:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        return BeautifulSoup(response.content, 'html.parser')
    except Exception as e:
        print(f" Error fetching {url}: {e}")
        return None


def extract_listing_links(soup):
    links = []
    cards = soup.find_all('li', attrs={"aria-label": "Listing"})
    for card in cards:
        a_tag = card.find("a", href=True)
        if a_tag:
            links.append(BASE_URL + a_tag["href"])
    return links


def extract_details_from_listing(url):
    soup = get_soup(url)
    if not soup:
        return None

    try:
        # You may need to inspect the HTML for more accurate selectors
        title = soup.find("h1")
        price = soup.find("span", string=lambda x: x and 'OMR' in x)
        location = soup.find("span", attrs={"aria-label": "Location"}) or soup.find("span", class_="_1ee53078")
        size = soup.find("span", attrs={"aria-label": "Area"})
        beds = soup.find("span", attrs={"aria-label": "Bedrooms"})
        baths = soup.find("span", attrs={"aria-label": "Bathrooms"})

        details = {
            "title": title.get_text(strip=True) if title else None,
            "price": price.get_text(strip=True) if price else None,
            "location": location.get_text(strip=True) if location else None,
            "size_sqm": size.get_text(strip=True) if size else None,
            "bedrooms": beds.get_text(strip=True) if beds else None,
            "bathrooms": baths.get_text(strip=True) if baths else None,
            "listing_type": "For Sale",  # Dubizzle Oman is only sale or rent
            "property_type": None,
            "purpose": "Sale",
            "completion": None,
            "furnishing": None,
            "added_on": None,
            "listing_id": url.split("-")[-1].strip("/"),
            "link": url
        }

        return details

    except Exception as e:
        print(f" Error parsing {url}: {e}")
        return None


def save_to_csv(data, filename):
    if not data:
        print(" No data to save.")
        return
    fieldnames = sorted({k for row in data for k in row.keys()})
    with open(filename, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)
    print(f"\n Saved {len(data)} listings to {filename}")


def scrape_dubizzle_properties():
    all_data = []
    page = 1

    while page <= MAX_PAGES:
        print(f"\n🔎 Scraping page {page}...")
        url = START_URL if page == 1 else f"{START_URL}?page={page}"
        soup = get_soup(url)
        if not soup:
            break

        links = extract_listing_links(soup)
        if not links:
            print(" No more listings found.")
            break

        for idx, listing_url in enumerate(links, start=1):
            print(f"  [{idx}/{len(links)}] Visiting: {listing_url}")
            data = extract_details_from_listing(listing_url)
            if data:
                all_data.append(data)
            time.sleep(SLEEP_BETWEEN_LISTINGS)

        page += 1
        time.sleep(SLEEP_BETWEEN_PAGES)

    save_to_csv(all_data, "dubizzle_properties.csv")


if __name__ == "__main__":
    scrape_dubizzle_properties()



🔎 Scraping page 1...
  [1/45] Visiting: https://www.dubizzle.com.om/en/ad/marvelous-marina-view-3070-payment-plan-the-sustainable-city-yiti-ID130270402.html
  [2/45] Visiting: https://www.dubizzle.com.om/en/ad/%D8%B4%D9%82%D8%A9-%D9%84%D9%84%D8%A8%D9%8A%D8%B9-%D8%A8%D9%85%D9%88%D8%A7%D8%B5%D9%81%D8%A7%D8%AA-%D9%85%D9%85%D9%8A%D8%B2%D8%A9-%D9%88%D8%A8%D8%AC%D9%88%D8%AF%D8%A9-%D9%85%D9%85%D8%AA%D8%A7%D8%B2%D8%A9-%D9%81%D8%B1%D8%B5%D8%A9-%D8%B1%D8%A7%D8%A6%D8%B9%D8%A9-%D9%84%D9%84%D8%A7%D8%B3%D8%AA%D8%AB%D9%85%D8%A7%D8%B1-%D8%A7%D9%88-%D8%A7%D9%84%D8%B3%D9%83%D9%86-ID130488968.html
  [3/45] Visiting: https://www.dubizzle.com.om/en/ad/1-bhk-apartment-for-sale-in-muscat-yiti-%D8%B4%D9%82%D8%A9-%D9%84%D9%84%D8%A8%D9%8A%D8%B9-%D9%81%D9%8A-%D9%85%D8%B3%D9%82%D8%B7-freehold-ID130488715.html
  [4/45] Visiting: https://www.dubizzle.com.om/en/ad/furnished-3-bedroom-lakefront-villa-for-sale-in-hawana-salalah-ID130367511.html
  [5/45] Visiting: https://www.dubizzle.com.om/en/ad/luxury-apartmentmouj