In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from bs4 import BeautifulSoup
import time

def scrape_kollbrunn():
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    # Initialize lists to store data
    addresses = []
    rooms = []
    living_space = []
    prices = []
    links = []

    # Setup ChromeDriver
    driver = webdriver.Chrome(service=service, options=options)

    # The URL for the Kollbrunn listings
    url = 'https://www.immoscout24.ch/en/real-estate/buy/city-kollbrunn'

    print(f"Scraping page for listings...")
    driver.get(url)
    time.sleep(5)  # Add a delay to allow the page to load

    # Get the full content of the webpage
    source = driver.page_source

    # Parse HTML content with BeautifulSoup
    soup = BeautifulSoup(source, 'html.parser')

    # Extract listings
    listings = soup.select('div[data-test="result-list"] > div[data-test="result-list-item"]')

    # Loop through each listing to extract detailed information
    for listing in listings:
        try:
            link_element = listing.select_one('a.HgCardElevated_link_EHfr7')  
            link = 'https://www.immoscout24.ch/en' + link_element['href']
            links.append(link)

            address_element = listing.select_one('div.HgListingCard_secondaryTitle_uVla3 > div > address')  
            address = address_element.text.strip() if address_element else 'N/A'
            addresses.append(address)

            rooms_element = listing.select_one('div.HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp > strong:nth-child(1)') 
            room = rooms_element.text.strip() if rooms_element else 'N/A'
            rooms.append(room)

            size_element = listing.select_one('div.HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp > strong[title="living space"]')
            size = size_element.text.strip() if size_element else 'N/A'
            living_space.append(size)

            price_element = listing.select_one('span.HgListingRoomsLivingSpacePrice_price_u9Vee')
            price = price_element.text.strip() if price_element else 'N/A'
            prices.append(price)

        except Exception as e:
            print(f"An error occurred while processing a listing: {e}")
            continue

    # Close the driver after scraping
    driver.quit()

    # Ensure all lists have the same length before creating DataFrame
    min_length = min(len(addresses), len(rooms), len(living_space), len(prices), len(links))
    df = pd.DataFrame({
        'Link': links[:min_length],
        'Address': addresses[:min_length],
        'Rooms': rooms[:min_length],
        'Living Space (sqm)': living_space[:min_length],
        'Price': prices[:min_length]
    })

    # Save to file
    df.to_csv('kollbrunn_listings.csv', sep=";", index=False)

    return df

# Run the scraping function and display the results
apartment_data = scrape_kollbrunn()
print(apartment_data)


Scraping page for listings...
                                           Link  \
0  https://www.immoscout24.ch/en/buy/4000824516   
1  https://www.immoscout24.ch/en/buy/4000739600   
2  https://www.immoscout24.ch/en/buy/4000683366   
3  https://www.immoscout24.ch/en/buy/4001114685   
4  https://www.immoscout24.ch/en/buy/4001057991   
5  https://www.immoscout24.ch/en/buy/4001057990   
6  https://www.immoscout24.ch/en/buy/4001057949   
7  https://www.immoscout24.ch/en/buy/4001057973   
8  https://www.immoscout24.ch/en/buy/4001057972   

                                             Address      Rooms  \
0                    Töbelistrasse22, 8483 Kollbrunn    2 rooms   
1                    Seemerrütiweg 8, 8483 Kollbrunn  2.5 rooms   
2  Siedlung Verdeblu Tösstalstrasse 18, 8483 Koll...  2.5 rooms   
3                  Tösstalstrasse 10, 8483 Kollbrunn  2.5 rooms   
4                                     8483 Kollbrunn  2.5 rooms   
5                                     8483 Kollbrunn  3.5

In [19]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time

def scrape_price_from_link(url):
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    # Setup ChromeDriver
    driver = webdriver.Chrome(service=service, options=options)

    # Navigate to the URL
    print(f"Scraping price from {url}...")
    driver.get(url)
    time.sleep(5)  # Add a delay to allow the page to load

    # Wait for the price element to be present and log the HTML content
    try:
        price_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '#app > main > div > div:nth-child(2) > div > div:nth-child(1) > div.DetailPage_detailPageLayoutWrapper_Vzi0l > div.DetailPage_detailPageContent_RZVaj > div.DetailPage_detailPage_E8Nmj > section.hg-listing-details > div.spotlight-components > div > div.SpotlightAttributesPrice_item_iVKUf.SpotlightAttributesPrice_priceItem_Azyy0 > div.SpotlightAttributesPrice_value_TqKGz > span:nth-child(2)'))
        )
        price_html = price_element.get_attribute('outerHTML')
        print(f"Price HTML: {price_html}")
        price = price_element.text.strip() if price_element else 'N/A'
    except Exception as e:
        print(f"An error occurred while extracting the price: {e}")
        price = 'N/A'

    # Close the driver after scraping
    driver.quit()

    return price

# URL to scrape
url = 'https://www.immoscout24.ch/buy/4001057990'
price = scrape_price_from_link(url)
print(f"Price: {price}")


Scraping price from https://www.immoscout24.ch/buy/4001057990...
Price HTML: <span><!----> 2,085,600.– <!----></span>
Price: 2,085,600.–


In [16]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from bs4 import BeautifulSoup
import time



def scrape_kollbrunn():
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    # Initialize lists to store data
    addresses = []
    rooms = []
    living_space = []
    prices = []
    links = []

    # Setup ChromeDriver
    driver = webdriver.Chrome(service=service, options=options)

    # The URL for the Kollbrunn listings
    url = 'https://www.immoscout24.ch/de/immobilien/kaufen/ort-kollbrunn'

    print(f"Scraping page for listings...")
    driver.get(url)
    time.sleep(5)  # Add a delay to allow the page to load

    # Get the full content of the webpage
    source = driver.page_source

    # Parse HTML content with BeautifulSoup
    soup = BeautifulSoup(source, 'html.parser')

    # Extract listings
    listings = soup.select('article[data-item]')

    # Loop through each listing to extract detailed information
    for listing in listings:
        try:
            link_element = listing.select_one('a.HgCardElevated_link_EHfr7')
            link = 'https://www.immoscout24.ch' + link_element['href']
            links.append(link)

            address_element = listing.select_one('div.HgListingCard_secondaryTitle_uVla3 > div > address')
            address = address_element.text.strip() if address_element else 'N/A'
            addresses.append(address)

            rooms_element = listing.select_one('div.HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp > strong:nth-child(1)')
            room = rooms_element.text.strip() if rooms_element else 'N/A'
            rooms.append(room)

            size_element = listing.select_one('div.HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp > strong[title="living space"]')
            size = size_element.text.strip() if size_element else 'N/A'
            living_space.append(size)

            price_element = listing.select_one('span.HgListingRoomsLivingSpacePrice_price_u9Vee') 
            price = price_element.text.strip() if price_element else 'N/A'                       
            prices.append(price)

        except Exception as e:
            print(f"An error occurred while processing a listing: {e}")
            continue

    # Close the driver after scraping
    driver.quit()

    # Ensure all lists have the same length before creating DataFrame
    min_length = min(len(addresses), len(rooms), len(living_space), len(prices), len(links))
    df = pd.DataFrame({
        'Link': links[:min_length],
        'Address': addresses[:min_length],
        'Rooms': rooms[:min_length],
        'Living Space (sqm)': living_space[:min_length],
        'Price': prices[:min_length]
    })

    # Save to file
    df.to_csv('kollbrunn_listings.csv', sep=";", index=False)

    return df

# Run the scraping function and display the results
apartment_data = scrape_kollbrunn()
print(apartment_data)





Scraping page for listings...
Empty DataFrame
Columns: [Link, Address, Rooms, Living Space (sqm), Price]
Index: []


In [4]:
import os
import re
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import warnings
 
# Settings
warnings.filterwarnings("ignore")
 
# Current working directory
print(f'Current working directory: {os.getcwd()}')
 
def scrape_kollbrunn():
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    driver = webdriver.Chrome(service=service, options=options)
 
    # The URL for the Kollbrunn listings
    url = 'https://www.immoscout24.ch/en/real-estate/buy/city-kollbrunn'
    driver.get(url)
 
    # Wait for the listings to load
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, 'div[data-test="result-list"] > div[data-test="result-list-item"]'))
    )
 
    # Get webpage number from url (if required)
    pattern = r'=(.*)'
    page = re.search(pattern, url)
 
    # Get the full content of the webpage
    source = driver.page_source
 
    # Close the driver after scraping
    driver.quit()
 
    # Parse HTML content with BeautifulSoup
    soup = BeautifulSoup(source, 'html.parser')
 
    # Initialize lists to store data
    links = []
    addresses = []
    rooms = []
    living_space = []
    prices = []
    titles = []
    descriptions = []
 
    # Extract listings
    listings = soup.select('div[data-test="result-list"] > div[data-test="result-list-item"]')
 
    # Loop through each listing to extract detailed information
    for listing in listings:
        try:
            # Get link
            link_element = listing.select_one('a.HgCardElevated_link_EHfr7')
            link = 'https://www.immoscout24.ch' + link_element['href'] if link_element else 'N/A'
            links.append(link)
 
            # Get address
            address_element = listing.select_one('div.HgListingCard_secondaryTitle_uVla3 > div > address')
            address = address_element.text.strip() if address_element else 'N/A'
            addresses.append(address)
 
            # Get rooms
            rooms_element = listing.select_one('div.HgListingCard_mainTitle_x0p2D > div > strong:nth-child(1)')
            room = rooms_element.text.strip() if rooms_element else 'N/A'
            rooms.append(room)
 
            # Get living space
            size_element = listing.select_one('div.HgListingCard_mainTitle_x0p2D > div > strong:nth-child(3)')
            size = size_element.text.strip() if size_element else 'N/A'
            living_space.append(size)
 
            # Get price
            price_element = listing.select_one('div.HgListingCard_mainTitle_x0p2D > div > span.HgListingRoomsLivingSpacePrice_price_u9Vee')
            price = price_element.text.strip() if price_element else 'N/A'
            prices.append(price)
 
            # Get title
            title_element = listing.select_one('div.HgListingDescription_description_r5HCO > span')
            title = title_element.text.strip() if title_element else 'N/A'
            titles.append(title)
 
            # Get description
            description_element = listing.select_one('div.HgListingDescription_description_r5HCO > p.HgListingDescription_large_uKs3J')
            description = description_element.text.strip() if description_element else 'N/A'
            descriptions.append(description)
 
        except Exception as e:
            print(f"An error occurred while processing a listing: {e}")
            continue
 
    # Ensure all lists have the same length before creating DataFrame
    min_length = min(len(links), len(addresses), len(rooms), len(living_space), len(prices), len(titles), len(descriptions))
    df = pd.DataFrame({
        'Page': page.group(1) if page else '1',
        'Link': links[:min_length],
        'Address': addresses[:min_length],
        'Rooms': rooms[:min_length],
        'Living Space (sqm)': living_space[:min_length],
        'Price': prices[:min_length],
        'Title': titles[:min_length],
        'Description': descriptions[:min_length]
    })
 
    # Save to file
    df.to_csv('kollbrunn_listings.csv', sep=";", index=False)
 
    return df
 
# Run the scraping function and display the results
apartment_data = scrape_kollbrunn()
print(apartment_data)

Current working directory: c:\APSArbeit\AppliedDataScience
  Page                                       Link  \
0    1  https://www.immoscout24.ch/buy/4000824516   
1    1  https://www.immoscout24.ch/buy/4000739600   
2    1  https://www.immoscout24.ch/buy/4000683366   
3    1  https://www.immoscout24.ch/buy/4001114685   
4    1  https://www.immoscout24.ch/buy/4001057991   
5    1  https://www.immoscout24.ch/buy/4001057990   
6    1  https://www.immoscout24.ch/buy/4001057949   
7    1  https://www.immoscout24.ch/buy/4001057973   
8    1  https://www.immoscout24.ch/buy/4001057972   

                                             Address      Rooms  \
0                    Töbelistrasse22, 8483 Kollbrunn    2 rooms   
1                    Seemerrütiweg 8, 8483 Kollbrunn  2.5 rooms   
2  Siedlung Verdeblu Tösstalstrasse 18, 8483 Koll...  2.5 rooms   
3                  Tösstalstrasse 10, 8483 Kollbrunn  2.5 rooms   
4                                     8483 Kollbrunn  2.5 rooms   
5       

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from bs4 import BeautifulSoup
import time

def scrape_prices_kollbrunn():
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    # Setup ChromeDriver
    driver = webdriver.Chrome(service=service, options=options)

    # The URL for the Kollbrunn listings
    url = 'https://www.immoscout24.ch/en/real-estate/buy/city-kollbrunn'
    driver.get(url)
    time.sleep(20)  # Add a delay to allow the page to load

    # Get the full content of the webpage
    source = driver.page_source
    driver.quit()  # Close the driver after scraping

    # Parse HTML content with BeautifulSoup
    soup = BeautifulSoup(source, 'html.parser')

    # Initialize list to store data
    prices = []

    # Extract listings
    listings = soup.select('article[data-item]')
    for listing in listings:
        # Extract price
        price_element = listing.select_one('span.HgListingRoomsLivingSpacePrice_price_u9Vee')
        price = price_element.text.strip() if price_element else 'N/A'
        prices.append(price)

    # Create DataFrame
    df = pd.DataFrame({
        'Price': prices
    })

    # Save to file
    df.to_csv('kollbrunn_prices.csv', sep=";", index=False)

    return df

# Run the scraping function and display the results
price_data = scrape_prices_kollbrunn()
print(price_data)


Empty DataFrame
Columns: [Price]
Index: []


In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from bs4 import BeautifulSoup
import time

def scrape_homegate():
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    # Setup ChromeDriver
    driver = webdriver.Chrome(service=service, options=options)

    # The URL for the Winterthur listings
    url = 'https://www.homegate.ch/kaufen/immobilien/ort-winterthur/trefferliste'
    driver.get(url)
    time.sleep(5)  # Add a delay to allow the page to load

    # Get the full content of the webpage
    source = driver.page_source
    driver.quit()  # Close the driver after scraping

    # Save the HTML source to a file for manual inspection
    with open('page_source.html', 'w', encoding='utf-8') as file:
        file.write(source)

    # Parse HTML content with BeautifulSoup
    soup = BeautifulSoup(source, 'html.parser')

    # Initialize lists to store data
    addresses = []
    titles = []
    rooms = []
    prices = []

    # Extract listings
    listings = soup.find_all('div', class_='ResultList_listItem_j5Td_')

    # Loop through each listing to extract detailed information
    for listing in listings:
        try:
            # Extract address
            address_element = listing.find('div', class_='HgListingCard_address_JGiFv')
            address = address_element.text.strip() if address_element else 'N/A'
            addresses.append(address)

            # Extract title
            title_element = listing.find('p', class_='HgListingDescription_title_NAAxy')
            title = title_element.text.strip() if title_element else 'N/A'
            titles.append(title)

            # Extract rooms
            rooms_element = listing.find('div', class_='HgListingRoomsLivingSpace_roomsLivingSpace_GyVgq')
            room = rooms_element.text.strip() if rooms_element else 'N/A'
            rooms.append(room)

            # Extract price
            price_element = listing.find('span', class_='HgListingCard_price_JoPAs')
            price = price_element.text.strip() if price_element else 'N/A'
            prices.append(price)

        except Exception as e:
            print(f"An error occurred while processing a listing: {e}")
            continue

    # Create DataFrame
    df = pd.DataFrame({
        'Title': titles,
        'Address': addresses,
        'Rooms': rooms,
        'Price': prices
    })

    # Save to file
    df.to_csv('homegate_listings.csv', sep=";", index=False)

    return df

# Run the scraping function and display the results
homegate_data = scrape_homegate()
print(homegate_data)


                                                Title  \
0   "PANORAMA", 3 REFH an exklusiver Lage mit Stad...   
1   Erleben Sie Luxus und Komfort: Ihr exklusives ...   
2                         Einzug in den Sommerferien!   
3                                      Nahe S-Bahnhof   
4       BEZUGSBEREIT-ZENTRALE UND SEEEHR RUHIGE LAGE!   
5   "PANORAMA", 3 REFH an exklusiver Lage mit Stad...   
6                                      Nahe S-Bahnhof   
7   Von Sonne und Aussicht verwöhnt: Reihen-Einfam...   
8      LICHTDURCHFLUTET, GROSSER BALKON AM SONNENHANG   
9   Einladende und lichtdurchflutete Eigentumswohnung   
10  Einladende Eigentumswohnung mit verglastem Balkon   
11  Komfortable Eigentumswohnung im obersten Geschoss   
12                     Terrassenwohnung in Winterthur   
13       Wohnliegenschaft mit Potenzial in Winterthur   
14  Gepflegte 2.5 Zimmer Wohnung an guter Verkehrs...   
15  Moderne Eleganz trifft auf Komfort: Traumhafte...   
16  Grosszügige 3.5-Zimmer Dopp

In [4]:
# Save the HTML source to a file for manual inspection
with open('page_source.html', 'w', encoding='utf-8') as file:
    file.write(source)


NameError: name 'source' is not defined