In [16]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from bs4 import BeautifulSoup
import time

def scrape_kollbrunn():
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    # Initialize lists to store data
    addresses = []
    rooms = []
    living_space = []
    prices = []
    links = []

    # Setup ChromeDriver
    driver = webdriver.Chrome(service=service, options=options)

    # The URL for the Kollbrunn listings
    url = 'https://www.immoscout24.ch/en/real-estate/buy/city-kollbrunn'

    print(f"Scraping page for listings...")
    driver.get(url)
    time.sleep(5)  # Add a delay to allow the page to load

    # Get the full content of the webpage
    source = driver.page_source

    # Parse HTML content with BeautifulSoup
    soup = BeautifulSoup(source, 'html.parser')

    # Extract listings
    listings = soup.select('div[data-test="result-list"] > div[data-test="result-list-item"]')

    # Loop through each listing to extract detailed information
    for listing in listings:
        try:
            link_element = listing.select_one('a.HgCardElevated_link_EHfr7')
            link = 'https://www.immoscout24.ch' + link_element['href']
            links.append(link)

            address_element = listing.select_one('div.HgListingCard_secondaryTitle_uVla3 > div > address')
            address = address_element.text.strip() if address_element else 'N/A'
            addresses.append(address)

            rooms_element = listing.select_one('div.HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp > strong:nth-child(1)')
            room = rooms_element.text.strip() if rooms_element else 'N/A'
            rooms.append(room)

            size_element = listing.select_one('div.HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp > strong[title="living space"]')
            size = size_element.text.strip() if size_element else 'N/A'
            living_space.append(size)

            price_element = listing.select_one('div.HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp > span.HgListingRoomsLivingSpacePrice_price_u9Vee')
            price = price_element.text.strip() if price_element else 'N/A'
            prices.append(price)

        except Exception as e:
            print(f"An error occurred while processing a listing: {e}")
            continue

    # Close the driver after scraping
    driver.quit()

    # Ensure all lists have the same length before creating DataFrame
    min_length = min(len(addresses), len(rooms), len(living_space), len(prices), len(links))
    df = pd.DataFrame({
        'Link': links[:min_length],
        'Address': addresses[:min_length],
        'Rooms': rooms[:min_length],
        'Living Space (sqm)': living_space[:min_length],
        'Price': prices[:min_length]
    })

    # Save to file
    df.to_csv('kollbrunn_listings.csv', sep=";", index=False)

    return df

# Run the scraping function and display the results
apartment_data = scrape_kollbrunn()
print(apartment_data)


Scraping page for listings...
                                        Link  \
0  https://www.immoscout24.ch/buy/4000824516   
1  https://www.immoscout24.ch/buy/4000739600   
2  https://www.immoscout24.ch/buy/4000683366   
3  https://www.immoscout24.ch/buy/4001114685   
4  https://www.immoscout24.ch/buy/4001057991   
5  https://www.immoscout24.ch/buy/4001057990   
6  https://www.immoscout24.ch/buy/4001057949   
7  https://www.immoscout24.ch/buy/4001057973   
8  https://www.immoscout24.ch/buy/4001057972   

                                             Address      Rooms  \
0                    Töbelistrasse22, 8483 Kollbrunn    3 rooms   
1                    Seemerrütiweg 8, 8483 Kollbrunn  4.5 rooms   
2  Siedlung Verdeblu Tösstalstrasse 18, 8483 Koll...  4.5 rooms   
3                  Tösstalstrasse 10, 8483 Kollbrunn  3.5 rooms   
4                                     8483 Kollbrunn  4.5 rooms   
5                                     8483 Kollbrunn  4.5 rooms   
6                   

In [19]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time

def scrape_price_from_link(url):
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    # Setup ChromeDriver
    driver = webdriver.Chrome(service=service, options=options)

    # Navigate to the URL
    print(f"Scraping price from {url}...")
    driver.get(url)
    time.sleep(5)  # Add a delay to allow the page to load

    # Wait for the price element to be present and log the HTML content
    try:
        price_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#app > main > div > div:nth-child(2) > div > div:nth-child(1) > div.DetailPage_detailPageLayoutWrapper_Vzi0l > div.DetailPage_detailPageContent_RZVaj > div.DetailPage_detailPage_E8Nmj > section.hg-listing-details > div.spotlight-components > div > div.SpotlightAttributesPrice_item_iVKUf.SpotlightAttributesPrice_priceItem_Azyy0 > div.SpotlightAttributesPrice_value_TqKGz > span:nth-child(2)'))
        )
        price_html = price_element.get_attribute('outerHTML')
        print(f"Price HTML: {price_html}")
        price = price_element.text.strip() if price_element else 'N/A'
    except Exception as e:
        print(f"An error occurred while extracting the price: {e}")
        price = 'N/A'

    # Close the driver after scraping
    driver.quit()

    return price

# URL to scrape
url = 'https://www.immoscout24.ch/buy/4001057990'
price = scrape_price_from_link(url)
print(f"Price: {price}")


Scraping price from https://www.immoscout24.ch/buy/4001057990...
Price HTML: <span><!----> 2,085,600.– <!----></span>
Price: 2,085,600.–
