## prepare geodataframe of buildings

In [14]:
import geopandas as gpd

df_building_geo = gpd.read_file("../data/preprocessed_buildings.geojson")
df_building_geo.set_index("egid", inplace=True)

df_building_geo.set_crs(epsg=4326, allow_override=True)

print(df_building_geo.head())

          buildingStatus  buildingCategory  buildingClass  municipalityNumber  \
egid                                                                            
103124              1004              1060           1252                 199   
103194              1004              1060           1252                 199   
11516126            1004              1060           1252                 261   
1156661             1004              1060           1252                 230   
116693              1004              1060           1252                 243   

         municipalityName              geometry  
egid                                             
103124         Volketswil  POINT (8.673 47.389)  
103194         Volketswil  POINT (8.662 47.392)  
11516126          ZÃ¼rich  POINT (8.524 47.353)  
1156661        Winterthur  POINT (8.775 47.479)  
116693           Dietikon  POINT (8.406 47.412)  


## prepare geodataframe of farms

In [15]:
df_farm_geo = gpd.read_file('../data/processed_data/farms.geojson')

df_farm_geo = df_farm_geo.set_crs(epsg=4326, allow_override=True)

print(df_farm_geo.head())

                                                name  \
0  Ritzmann-Müller Betriebsgemeinschaft, 8459 Volken   
1         Bauernhof Familie Werffeli, 8104 Weiningen   
2                       Hof zur Au, 8165 Schleinikon   
3                         Sonnhaldenhof, 5610 Wohlen   
4              Streulis Privatbrennerei, 8810 Horgen   

                             address  \
0      Flaachtalstr. 43\n8459 Volken   
1  Friedhofstrasse 4\n8104 Weiningen   
2          Auweg 3\n8165 Schleinikon   
3         Sonnhaldenhof\n5610 Wohlen   
4   Rietwiesstrasse 139\n8810 Horgen   

                                        lebensmittel                  geometry  
0  [ { "category": "Verkaufsstellen", "products":...  POINT (8.62320 47.57524)  
1  [ { "category": "Eier", "products": [ "Hühnere...  POINT (8.43603 47.42175)  
2  [ { "category": "Verkaufsstellen", "products":...  POINT (8.39689 47.49843)  
3  [ { "category": "Verkaufsstellen", "products":...  POINT (8.29268 47.34226)  
4  [ { "category"

In [4]:
def find_nearby_farms_for_building(building, nearby_farms_df, farm_dict):
    building_nearby_farms = nearby_farms_df[
        nearby_farms_df["building_index"] == building.name
    ]
    farms = []
    for _, row in building_nearby_farms.iterrows():
        farm_id = row["farm_index"]
        farms.append(
            {
                "id": farm_id,
                "distance": row["distance"],
                "name": farm_dict[farm_id][
                    "name"
                ]
            }
        )
    return farms

In [5]:
import pandas as pd
from geopy.distance import geodesic


def find_nearby_farms(building_gdf, farm_gdf, max_distance_km):
    nearby_farms_list = []
    for idx, building in building_gdf.iterrows():
        # Ensure correct order of coordinates (latitude, longitude)
        building_coords = (building["geometry"].y, building["geometry"].x)

        # Filter farms based on distance
        for j, farm in farm_gdf.iterrows():
            # Ensure correct order of coordinates (latitude, longitude)
            farm_coords = (farm["geometry"].y, farm["geometry"].x)
            distance = geodesic(building_coords, farm_coords).meters

            if distance <= max_distance_km:
                nearby_farms_list.append(
                    {"building_index": idx, "farm_index": j, "distance": distance}
                )

    # Convert the results list to a DataFrame
    nearby_farms_df = pd.DataFrame(nearby_farms_list)

    return nearby_farms_df

In [6]:
# Convert farm GeoDataFrame to dictionary for faster lookup
farm_dict = df_farm_geo.to_dict(orient="index")

max_distance = 10000  # 10 km

# Find all nearby farms for each building
nearby_farms = find_nearby_farms(df_building_geo, df_farm_geo, max_distance)

In [7]:
nearby_farms.head()

Unnamed: 0,building_index,farm_index,distance
0,103124,6,6420.529978
1,103124,13,8990.21726
2,103124,26,7069.523291
3,103124,31,7135.936047
4,103124,35,3241.967143


In [8]:
import json

with open('../data/processed_data/processed_buildings.geojson', 'r') as file:
    original_geojson = json.load(file)

nearby_farms_dict = {}
for _, row in nearby_farms.iterrows():
    if row["building_index"] not in nearby_farms_dict:
        nearby_farms_dict[row["building_index"]] = []
    nearby_farms_dict[row["building_index"]].append(
        {"farm_id": row["farm_index"], "distance": row["distance"]}
    )

# Enrich the original GeoJSON with the nearby farms data
for feature in original_geojson["features"]:
    building_index = feature["properties"]["egid"]
    if building_index in nearby_farms_dict:
        feature["properties"]["nearby_farms"] = nearby_farms_dict[building_index]

In [9]:
# save
output_filename = "../data/processed_data/enriched_buildings_with_farms.geojson"
with open(output_filename, "w") as file:
    json.dump(original_geojson, file, ensure_ascii=False, indent=4)