In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

In [3]:
all_flat = []
for page in range(1, 8):
    print(f"Scraping page {page}...")
    url = f"https://hilalprp.com.om/properties-search/page/{page}/?status=for-sale"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    listings = soup.find_all('div', class_="rh_list_card__map_wrap")
    if not listings:
        print(f"No listings found on page {page}")
        continue
    for listing in listings:
        try:
            title = listing.find('h3').text.strip()
            price = listing.find('p', class_="price").text.strip()
            full_text = listing.get_text(separator=' ').lower()
            title_text = title.lower()
            # find number of bedrooms
            bed_match = re.search(r'(\d+)[\s-]*bed', title_text) or re.search(r'(\d+)\s*bed', full_text)
            bedrooms = bed_match.group(1) if bed_match else 'N/A'
            link_tag = listing.find('a', href=True)
            detail_url = link_tag['href'] if link_tag else None
            location = "N/A"
            area = "N/A"
            if detail_url:
                detail_resp = requests.get(detail_url, headers=headers)
                detail_soup = BeautifulSoup(detail_resp.content, 'html.parser')
                # Location from city link 
                city_link = detail_soup.find('a', href=re.compile(r'/property-city/'))
                if city_link:
                    location = city_link.text.strip()
                # Area
                area_div = detail_soup.find("div", class_="rh_property__meta prop_area")
                if area_div:
                    area_span = area_div.find("span", class_="figure")
                    if area_span:
                        area = area_span.text.strip()
            all_flat.append({
                "Title": title,
                "Price": price,
                "Location": location,
                "Bedrooms": bedrooms,
                "Area (sqm)": area
            })
        except Exception as e:
            print(f"Error on page {page} listing: {e}")
    time.sleep(1)  
# Save to CSV
df2 = pd.DataFrame(all_flat)
df2.to_csv('hilal_properties.csv', index=False)
print(f"\n:white_check_mark: Total properties scraped: {len(df2)}")
print(df2.head())


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...

:white_check_mark: Total properties scraped: 63
                      Title       Price    Location Bedrooms Area (sqm)
0       3-BEDROOM APARTMENT   OMR45,000     Bausher        3        N/A
1           3-BEDROOM VILLA  OMR290,000  Al Mawaleh        3        N/A
2      6-BEDROOM TWIN VILLA  OMR180,000     Bausher        6        N/A
3  7-BEDROOM DETACHED VILLA  OMR300,000    Al Ansab        7        758
4  4-BEDROOM DETACHED VILLA   OMR80,000     Al Hail        4        N/A
