In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

def get_listing_links(page_url):
    response = requests.get(page_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        listings = soup.find('ul', class_='ulListing').find_all('li', class_='listingBox')
        return [listing['linkref'] for listing in listings]
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return []

# Base URL for the pages
base_url = 'https://www.mubawab.ma/fr/ct/agadir/immobilier-a-vendre-all:esmn:40:esmx:700:sc:apartment-sale,house-sale,villa-sale'

# Initialize an empty list to store all listing links
all_listing_links = []

# Assuming there are 25 pages
for page_number in range(1, 25):
    page_url = f"{base_url}:p:{page_number}"
    links = get_listing_links(page_url)
    all_listing_links.extend(links)
    print(f"Page {page_number}: {len(links)} links collected.")
    
# Save the links to a CSV file
links_df = pd.DataFrame(all_listing_links, columns=['Link'])
links_df.to_csv('listing_links.csv', index=False)
print("Links saved to listing_links.csv")

Page 1: 34 links collected.
Page 2: 34 links collected.
Page 3: 34 links collected.
Page 4: 34 links collected.
Page 5: 34 links collected.
Page 6: 34 links collected.
Page 7: 34 links collected.
Page 8: 34 links collected.
Page 9: 34 links collected.
Page 10: 34 links collected.
Page 11: 34 links collected.
Page 12: 34 links collected.
Page 13: 34 links collected.
Page 14: 34 links collected.
Page 15: 34 links collected.
Page 16: 34 links collected.
Page 17: 34 links collected.
Page 18: 34 links collected.
Page 19: 34 links collected.
Page 20: 34 links collected.
Page 21: 34 links collected.
Page 22: 34 links collected.
Page 23: 34 links collected.
Links saved to listing_links.csv


In [5]:

# Read the CSV file with the listing links
links_df = pd.read_csv('listing_links.csv')
listing_links = links_df['Link'].tolist()

# Initialize lists to store the extracted data
prices = []
quartiers = []
areas = []
rooms = []
bedrooms = []
bathrooms = []
types = []
statuses = []

# Function to extract details from each listing page
def extract_listing_details(link):
    response = requests.get(link)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract price
        price_tag = soup.find('h3', class_='orangeTit')
        price = ''.join(filter(str.isdigit, price_tag.get_text(strip=True))) if price_tag else 'N/A'
        
        # Extract quartier (neighborhood)
        quartier_div = soup.find('div', class_='col-8 vAlignM adBread')
        quartier_tag = quartier_div.find_all('a', class_='darkblue')[-1] if quartier_div else None
        quartier = quartier_tag.get_text(strip=True) if quartier_tag else 'N/A'
        
        # Extract details using icons
        details_div = soup.find('div', class_='disFlex adDetails')
        area, room, bedroom, bathroom = 'N/A', 'N/A', 'N/A', 'N/A'
        if details_div:
            details = details_div.find_all('div', class_='adDetailFeature')
            for detail in details:
                icon = detail.find('i', class_='adDetailFeatureIcon')
                value = detail.find('span').get_text(strip=True)
                if 'icon-triangle' in icon['class']:
                    area = re.findall(r'\d+', value)[0] if value else 'N/A'
                elif 'icon-house-boxes' in icon['class']:
                    room = re.findall(r'\d+', value)[0] if value else 'N/A'
                elif 'icon-bed' in icon['class']:
                    bedroom = re.findall(r'\d+', value)[0] if value else 'N/A'
                elif 'icon-bath' in icon['class']:
                    bathroom = re.findall(r'\d+', value)[0] if value else 'N/A'

                    
        # Extract additional features
        features_div = soup.find('div', class_='adFeatures')
        type_, status = 'N/A', 'N/A'
        if features_div:
            features = features_div.find_all('div', class_='adMainFeature')
            for feature in features:
                label = feature.find('p', class_='adMainFeatureContentLabel').get_text(strip=True)
                value = feature.find('p', class_='adMainFeatureContentValue').get_text(strip=True)
                if label == 'Type de bien':
                    type_ = value
                elif label == 'Etat':
                    status = value

        # Append the data to the lists
        prices.append(price)
        quartiers.append(quartier)
        areas.append(area)
        rooms.append(room)
        bedrooms.append(bedroom)
        bathrooms.append(bathroom)
        types.append(type_)
        statuses.append(status)
    else:
        print(f"Failed to retrieve listing page: {link}. Status code: {response.status_code}")

# Loop through the first few links and extract details
num = 0
for link in listing_links:
    print(f"Extracting details from link {num}")
    extract_listing_details(link)
    num += 1

# Create a DataFrame to store the extracted data
data = pd.DataFrame({
    'Type': types,
    'Quartier': quartiers,
    'Price': prices,
    'Area': areas,
    'Rooms': rooms,
    'Bedrooms': bedrooms,
    'Bathrooms': bathrooms,
    'Status': statuses,
})

# Display the DataFrame
print(data)

# Save the data to a CSV file
data.to_csv('listing_details.csv', index=False)


Extracting details from link 0
Extracting details from link 1
Extracting details from link 2
Extracting details from link 3
Extracting details from link 4
Extracting details from link 5
Extracting details from link 6
Extracting details from link 7
Extracting details from link 8
Extracting details from link 9
Extracting details from link 10
Extracting details from link 11
Extracting details from link 12
Extracting details from link 13
Extracting details from link 14
Extracting details from link 15
Extracting details from link 16
Extracting details from link 17
Extracting details from link 18
Extracting details from link 19
Extracting details from link 20
Extracting details from link 21
Extracting details from link 22
Extracting details from link 23
Extracting details from link 24
Extracting details from link 25
Extracting details from link 26
Extracting details from link 27
Extracting details from link 28
Extracting details from link 29
Extracting details from link 30
Extracting details