### Importing all libraries

In [1]:
import requests
from bs4 import BeautifulSoup

### initiating user agent to avoid getting blocked

In [4]:
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
  'referer': 'https://www.sarouty.ma/fr/recherche?c=1&fu=0&l=113&ob=mr&page=1'
}

### passing our url

In [7]:
base_url = 'https://www.sarouty.ma/fr/recherche?c=1&fu=0&l=113&ob=mr&page='

### Number of pages to scrape

In [10]:
num_pages = 10

### List to store all scraped data

In [13]:
all_properties = []

In [None]:
### Functio

In [15]:
# Function to clean the location string
def clean_location(location):
    # Remove "Rabat" and any trailing commas or spaces
    location = location.replace("Rabat", "").strip()
    if location.endswith(","):
        location = location[:-1].strip()
    return location


In [17]:
# Function to clean the price string and convert to integer or float
def clean_price(price_text):
    # Remove "MAD", spaces, non-breaking spaces, and commas
    price_clean = (
        price_text.strip()
        .replace("MAD", "")
        .replace("\u202f", "")  # Remove non-breaking spaces
        .replace(" ", "")
        .replace(",", "")  # Remove commas
    )
    # Convert to integer or float
    try:
        if "." in price_clean:  # If it contains a decimal point, convert to float
            return float(price_clean)
        else:  # Otherwise, convert to integer
            return int(price_clean)
    except ValueError:
        return "N/A"  # If conversion fails, return "N/A"

In [58]:
# Loop through each page
for page in range(1, num_pages + 1):
    print(f"Scraping page {page}...")
    url = base_url + str(page)
    response = requests.get(url, headers=header)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve page {page}. Status code: {response.status_code}")
        continue
    
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all property cards on the page
    property_cards = soup.find_all('div', class_='card__content')
    
    # Loop through each property card and extract data
    for card in property_cards:
        try:
            # Extract property type
            property_type = card.find('p', class_='card-intro__type')
            property_type = property_type.text.strip() if property_type else "N/A"
            
            
            # Extract URL
            link_tag = card.find('a', href=True)  # Find the first <a> tag with an href attribute
            url = "https://www.sarouty.ma" + link_tag['href'] if link_tag else "N/A"
            
            # Extract location and clean it
            location = card.find('span', class_='card-specifications__location-text')
            location = clean_location(location.text.strip()) if location else "N/A"
            
             # Extract bedrooms, bathrooms, and area
            bedrooms = "N/A"
            bathrooms = "N/A"
            area = "N/A"
            
            # Loop through all amenities to correctly identify bedrooms, bathrooms, and area
            for item in card.find_all('p', class_='card-specifications__item'):
                text = item.text.strip()
                if 'bedroom' in text.lower() or 'chambre' in text.lower():  # Check for bedrooms
                    bedrooms = text
                elif 'bathroom' in text.lower() or 'salle de bain' in text.lower():  # Check for bathrooms
                    bathrooms = text
                elif 'm²' in text:  # Check for area
                    area = text.replace('\xa0', '').replace('m²', '')

             # Extract price
            price = card.find('p', class_='card-intro__price')
            price = clean_price(price.text) if price else "N/A"
            
            # Store the extracted data
            property_data = {
                "type": property_type,
                "url": url,
                "location": location,
                "bedrooms": bedrooms,
                "bathrooms": bathrooms,
                "area": area,
                "price": price,
            }
            all_properties.append(property_data)
        except Exception as e:
            print(f"Error extracting data from a property card: {e}")
            continue

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...


In [60]:
print(f"Total properties scraped: {len(all_properties)}")

Total properties scraped: 1000


### Print the scraped data

In [63]:
for property_data in all_properties:
    print(property_data)

{'type': 'Appartement', 'price': 2550000, 'url': 'N/A', 'location': 'Hay Riad', 'bedrooms': '2 bedrooms', 'bathrooms': '2 bathrooms', 'area': '96 '}
{'type': 'Villa', 'price': 24000000, 'url': 'N/A', 'location': 'Souissi', 'bedrooms': '7+ bedrooms', 'bathrooms': '5 bathrooms', 'area': '1\u202f700 '}
{'type': 'Villa', 'price': 27000000, 'url': 'N/A', 'location': 'Souissi', 'bedrooms': '6 bedrooms', 'bathrooms': '6 bathrooms', 'area': '3\u202f815 '}
{'type': 'Villa', 'price': 12500000, 'url': 'N/A', 'location': 'Souissi', 'bedrooms': '5 bedrooms', 'bathrooms': '5 bathrooms', 'area': '550 '}
{'type': 'Villa', 'price': 18000000, 'url': 'N/A', 'location': 'Souissi', 'bedrooms': '7+ bedrooms', 'bathrooms': '5 bathrooms', 'area': '2\u202f000 '}
{'type': 'Appartement', 'price': 2450000, 'url': 'N/A', 'location': 'Hay Riad', 'bedrooms': '2 bedrooms', 'bathrooms': '2 bathrooms', 'area': '91 '}
{'type': 'Villa', 'price': 7800000, 'url': 'N/A', 'location': 'Hassan', 'bedrooms': '3 bedrooms', 'bath

### Save the scraped data into a CSV file

In [66]:
import pandas as pd

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_properties)

# Save to CSV
df.to_csv('properties.csv', index=False, encoding='utf-8-sig')

print("Data saved to properties.csv")

Data saved to properties.csv


In [68]:
pd.read_csv('properties.csv')

Unnamed: 0,type,price,url,location,bedrooms,bathrooms,area
0,Appartement,2550000,,Hay Riad,2 bedrooms,2 bathrooms,96
1,Villa,24000000,,Souissi,7+ bedrooms,5 bathrooms,1 700
2,Villa,27000000,,Souissi,6 bedrooms,6 bathrooms,3 815
3,Villa,12500000,,Souissi,5 bedrooms,5 bathrooms,550
4,Villa,18000000,,Souissi,7+ bedrooms,5 bathrooms,2 000
...,...,...,...,...,...,...,...
995,Terrain,9050000,,Souissi,,,1 810
996,Villa,9200000,,Hay Riad,7 bedrooms,3 bathrooms,500
997,Terrain,60000000,,El Menzeh,,,100 000
998,Villa,10000000,,Hay Riad,7+ bedrooms,2 bathrooms,942
