# Scrape farm details

In [1]:
# load csv data

import csv

farm_overview = []
with open('../data/raw_data/farm_overview.csv', mode='r') as f:
    reader = csv.reader(f)
    for row in reader:
        farm_overview.append(row)

farm_overview = farm_overview[1:]  # remove header

print(farm_overview)

[['Flower Top, 8046  Zürich', 'https://www.hofsuche.schweizerbauern.ch/de/farm/flower-top-9766/vomhof'], ['Ritzmann-Müller Betriebsgemeinschaft, 8459 Volken', 'https://www.hofsuche.schweizerbauern.ch/de/farm/ritzmann-mueller-betriebsgemeinschaft-13796/vomhof'], ['Bauernhof Familie Werffeli, 8104 Weiningen', 'https://www.hofsuche.schweizerbauern.ch/de/farm/bauernhof-familie-werffeli-9118/vomhof'], ['Hof zur Au, 8165 Schleinikon', 'https://www.hofsuche.schweizerbauern.ch/de/farm/hof-zur-au-12143/vomhof'], ['Sonnhaldenhof, 5610 Wohlen', 'https://www.hofsuche.schweizerbauern.ch/de/farm/sonnhaldenhof-9121/vomhof'], ['Streulis Privatbrennerei, 8810 Horgen', 'https://www.hofsuche.schweizerbauern.ch/de/farm/streulis-privatbrennerei-9652/vomhof'], ['Bio Waidhof, 8052 Zürich', 'https://www.hofsuche.schweizerbauern.ch/de/farm/bio-waidhof-421/vomhof'], ['Gfellerhof und Kämatten, 8051 Zürich', 'https://www.hofsuche.schweizerbauern.ch/de/farm/gfellerhof-und-kaematten-1253/vomhof'], ['Schumacher Agro

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

from typing import Dict

def scrape_detail(url: str) -> Dict:
    # Set up Selenium with Chrome
    options = Options()
    options.headless = True
    driver = webdriver.Chrome(options=options)

    # url = "https://www.hofsuche.schweizerbauern.ch/de/farm/langacherhof-9795/vomhof"
    driver.get(url)

    # Wait for the content to load
    # wait = WebDriverWait(driver, 10)
    # wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'address-block')))

    time.sleep(3)


    # Extract the address
    address_block = driver.find_element(By.CLASS_NAME, 'address-block')
    address = address_block.text if address_block else 'Address not found'

    # Extract the "Lebensmittel" information
    lebensmittel_data = []
    lebensmittel_sections = driver.find_elements(By.CSS_SELECTOR, '.detail-section .comp-vomhof-offer-rubric')

    for section in lebensmittel_sections:
        category = section.find_element(By.CSS_SELECTOR, '.sub.label').text
        products = section.find_elements(By.CSS_SELECTOR, '.product-name')
        product_names = [product.text for product in products if product.text]
        
        lebensmittel_data.append({'category': category, 'products': product_names})

    farm_info = {'address': address, 'lebensmittel': lebensmittel_data}

    driver.quit()

    # Now `farm_info` contains all the required information
    print(farm_info)
    return farm_info


In [4]:
farm_details = []

for farm in farm_overview:
    farm_name = farm[0]
    farm_url = farm[1]
    farm_info = scrape_detail(farm_url)
    farm_details.append({"name": farm_name, "info": farm_info})

{'address': 'Jürg Schwarz\nZehntenhaussstrasse\n8046 Zürich', 'lebensmittel': [{'category': 'Blumen und Pflanzen', 'products': ['Schnittblumen']}, {'category': 'Gemüse und andere pflanzliche Produkte', 'products': ['Kürbis']}, {'category': 'Getränke', 'products': ['Apfelsaft']}, {'category': 'Obst, Beeren und Nüsse', 'products': ['Äpfel', 'Birnen', 'Zwetschgen']}]}
{'address': 'Christina Ritzmann\nFlaachtalstr. 43\n8459 Volken', 'lebensmittel': [{'category': 'Verkaufsstellen', 'products': []}, {'category': 'Gemüse und andere pflanzliche Produkte', 'products': ['Kartoffeln', 'Weisskohl', 'Zwiebeln']}, {'category': 'Getränke', 'products': ['Likör', 'Marc', 'Roséwein', 'Rotwein', 'Traubensaft', 'Weisswein']}, {'category': 'Obst, Beeren und Nüsse', 'products': ['Trauben']}]}
{'address': 'Godi Werffeli\nFriedhofstrasse 4\n8104 Weiningen', 'lebensmittel': [{'category': 'Eier', 'products': ['Hühnereier']}, {'category': 'Fleisch', 'products': ['Kalbfleisch', 'Rindfleisch', 'Schweinefleisch', '

In [5]:
import json

with open("../data/raw_data/farm_details.json", "w", encoding='utf-8') as f:
    json.dump(farm_details, f, ensure_ascii=False, indent=4)