In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from bs4 import BeautifulSoup
import time

def scrape_real_estate_prices(url):
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    driver = webdriver.Chrome(service=service, options=options)

    # Navigate to the webpage
    driver.get(url)
    time.sleep(5)  # Wait for the page to load fully

    # Get the full content of the webpage
    source = driver.page_source
    soup = BeautifulSoup(source, 'html.parser')

    # Initialize lists to store data
    prices = []
    rooms = []

    # Extract prices and rooms using CSS selectors
    listings = soup.select('div.HgListingCard_card_QGuXn')
    for listing in listings:
        # Extract price
        price_element = listing.select_one('span.HgListingCard_price_JoPAs')
        if price_element:
            prices.append(price_element.text.strip())
        else:
            prices.append('Not available')

        # Extract rooms
        room_element = listing.select_one('span:nth-child(1) > strong')
        if room_element:
            rooms.append(room_element.text.strip())
        else:
            rooms.append('Not available')

    # Close the driver after scraping
    driver.quit()

    # Create a DataFrame to store the collected data
    df = pd.DataFrame({
        'Price': prices,
        'Rooms': rooms
    })

    return df

# Example usage
url = 'https://www.homegate.ch/buy/real-estate/canton-zurich/matching-list'
real_estate_data = scrape_real_estate_prices(url)
real_estate_data.to_csv('real_estate_prices_zurich.csv', index=False)
print("Scraping complete.")


Scraping complete.
