In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from bs4 import BeautifulSoup
import time

def scrape_apartments(url):
    # Setup ChromeDriver
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)
    
    # Initialize lists to store data
    addresses = []
    titles = []
    descriptions = []
    rooms = []
    living_space = []
    prices = []

    # Loop through the 50 pages
    for page_num in range(1, 50):
        print(f"Scraping page {page_num}...")
        driver = webdriver.Chrome(service=service, options=options)  # Define driver within the loop
        page_url = f"{url}?page={page_num}"
        driver.get(page_url)
        time.sleep(3)  # Add a delay to allow the page to load

        # Get the full content of the website
        source = driver.page_source

        # Parse HTML content with BeautifulSoup
        soup = BeautifulSoup(source, 'html.parser')

        # Get addresses
        address_elements = soup.find_all(class_='HgListingCard_address_JGiFv')
        for element in address_elements:
            address = element.find('address').text.strip()
            addresses.append(address)

        # Get titles and descriptions
        title_desc_elements = soup.find_all(class_='HgListingDescription_description_r5HCO')
        for element in title_desc_elements:
            title = element.find('span').text.strip()
            description_elem = element.find('p', class_='HgListingDescription_large_uKs3J')
            description = description_elem.text.strip() if description_elem else ''
            titles.append(title)
            descriptions.append(description)

        # Get rooms, living space, and prices
        room_space_price_elements = soup.find_all(class_='HgListingRoomsLivingSpacePrice_roomsLivingSpacePrice_M6Ktp')
        for element in room_space_price_elements:
            strong_tags = element.find_all('strong')
            rooms.append(strong_tags[0].text.strip()) if strong_tags else rooms.append('')
            living_space.append(strong_tags[1].text.strip()) if len(strong_tags) > 1 else living_space.append('')
            prices.append(element.find('span').text.strip())

        # Close driver
        driver.quit()

    # Ensure all arrays have the same length
    data_length = min(len(addresses), len(titles), len(descriptions), len(rooms), len(living_space), len(prices))

    # Dataframe
    df = pd.DataFrame({'Address': addresses[:data_length],
                       'Title': titles[:data_length],
                       'Description': descriptions[:data_length],
                       'Rooms': rooms[:data_length],
                       'Living Space (sqm)': living_space[:data_length],
                       'Price': prices[:data_length]})

    # Save to file
    df.to_csv('immoscout24.csv', sep=";", index=False)

    return df

# Specify the URL to scrape
url = 'https://www.immoscout24.ch/en/real-estate/buy/country-switzerland-fl'

# Run the scraping function and display the results
apartment_data = scrape_apartments(url)
print(apartment_data)


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping page 40...
Scraping page 41...
Scraping page 42...
Scraping page 43...
Scraping page 44...
Scraping page 45...
Scraping page 46...
Scraping page 47...
Scraping page 48...
Scraping page 49...
                             