# Scrape data from [Schweizer Bauern](https://www.hofsuche.schweizerbauern.ch)

## filter data
We're only interested in farms from zurich, so we filter the data accordingly (``extSearch.query.q=Zürich``): https://www.hofsuche.schweizerbauern.ch/de?extSearch.query.q=Zürich&project=vomhof&extSearch=true

In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from time import sleep

options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)

farm_data = [] 

try:
    driver.get(
        "https://www.hofsuche.schweizerbauern.ch/de?extSearch.query.q=Zürich&project=vomhof&extSearch=true"
    )
    sleep(5)

    # Click the "Mehr laden" button until it's no longer available or a set number of times to load all content
    while True:
        # Scroll to the bottom of the page
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        sleep(2)  # wait for the page to scroll down and content to load

        try:
            load_more_button = driver.find_element(
                By.XPATH, '//button[contains(., "Mehr laden")]'
            )
            load_more_button.click()
            sleep(4)  # wait for the content to load after clicking
        except Exception as e:
            print("All content loaded or button not found.")
            break

    # Extract the required data
    farm_cards = driver.find_elements(By.CSS_SELECTOR, 'a[href*="/de/farm/"]')
    for card in farm_cards:
        href = card.get_attribute("href")
        title = card.find_element(By.CSS_SELECTOR, "div.card.de").get_attribute("title")
        farm_data.append({"title": title, "url": href})

finally:
    driver.quit()

print(farm_data)

All content loaded or button not found.
[{'title': 'Flower Top, 8046  Zürich', 'url': 'https://www.hofsuche.schweizerbauern.ch/de/farm/flower-top-9766/vomhof'}, {'title': 'Ritzmann-Müller Betriebsgemeinschaft, 8459 Volken', 'url': 'https://www.hofsuche.schweizerbauern.ch/de/farm/ritzmann-mueller-betriebsgemeinschaft-13796/vomhof'}, {'title': 'Bauernhof Familie Werffeli, 8104 Weiningen', 'url': 'https://www.hofsuche.schweizerbauern.ch/de/farm/bauernhof-familie-werffeli-9118/vomhof'}, {'title': 'Hof zur Au, 8165 Schleinikon', 'url': 'https://www.hofsuche.schweizerbauern.ch/de/farm/hof-zur-au-12143/vomhof'}, {'title': 'Sonnhaldenhof, 5610 Wohlen', 'url': 'https://www.hofsuche.schweizerbauern.ch/de/farm/sonnhaldenhof-9121/vomhof'}, {'title': 'Streulis Privatbrennerei, 8810 Horgen', 'url': 'https://www.hofsuche.schweizerbauern.ch/de/farm/streulis-privatbrennerei-9652/vomhof'}, {'title': 'Bio Waidhof, 8052 Zürich', 'url': 'https://www.hofsuche.schweizerbauern.ch/de/farm/bio-waidhof-421/vomho

In [8]:
import csv

with open('../data/farm_overview.csv', mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Title', 'URL'])

        for farm in farm_data:
            writer.writerow([farm['title'], farm['url']])