# Scrape farm details

In [None]:
# load csv data
import csv

farm_overview = []
with open("../data/raw_data/farm_overview.csv", mode="r") as f:
    reader = csv.reader(f)
    for row in reader:
        farm_overview.append(row)

farm_overview = farm_overview[1:]  # remove header

print(farm_overview)

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

from typing import Dict


def scrape_detail(url: str) -> Dict:
    # Set up Selenium with Chrome
    options = Options()
    options.headless = True
    driver = webdriver.Chrome(options=options)

    # url example: "https://www.hofsuche.schweizerbauern.ch/de/farm/langacherhof-9795/vomhof"
    driver.get(url)

    time.sleep(3)

    # Extract the address
    address_block = driver.find_element(By.CLASS_NAME, "address-block")
    address = address_block.text if address_block else "Address not found"

    # Extract the "Lebensmittel" information
    lebensmittel_data = []
    lebensmittel_sections = driver.find_elements(By.CSS_SELECTOR, ".detail-section .comp-vomhof-offer-rubric")

    for section in lebensmittel_sections:
        category = section.find_element(By.CSS_SELECTOR, ".sub.label").text
        products = section.find_elements(By.CSS_SELECTOR, ".product-name")
        product_names = [product.text for product in products if product.text]

        lebensmittel_data.append({"category": category, "products": product_names})

    farm_info = {"address": address, "lebensmittel": lebensmittel_data}

    driver.quit()

    # Now `farm_info` contains all the required information
    print(farm_info)
    return farm_info

In [None]:
farm_details = []

for farm in farm_overview:
    farm_name = farm[0]
    farm_url = farm[1]
    farm_info = scrape_detail(farm_url)
    farm_details.append({"name": farm_name, "info": farm_info})

In [None]:
import json

with open("../data/raw_data/farm_details.json", "w", encoding="utf-8") as f:
    json.dump(farm_details, f, ensure_ascii=False, indent=4)