## prepare geodataframe of buildings

In [None]:
import geopandas as gpd

df_building_geo = gpd.read_file("../data/preprocessed_buildings.geojson")
df_building_geo.set_index("egid", inplace=True)

df_building_geo.set_crs(epsg=4326, allow_override=True)

print(df_building_geo.head())

## prepare geodataframe of farms

In [None]:
df_farm_geo = gpd.read_file('../data/processed_data/farms.geojson')

df_farm_geo = df_farm_geo.set_crs(epsg=4326, allow_override=True)

print(df_farm_geo.head())

In [None]:
def find_nearby_farms_for_building(building, nearby_farms_df, farm_dict):
    building_nearby_farms = nearby_farms_df[
        nearby_farms_df["building_index"] == building.name
    ]
    farms = []
    for _, row in building_nearby_farms.iterrows():
        farm_id = row["farm_index"]
        farms.append(
            {
                "id": farm_id,
                "distance": row["distance"],
                "name": farm_dict[farm_id][
                    "name"
                ]
            }
        )
    return farms

In [None]:
import pandas as pd
from geopy.distance import geodesic


def find_nearby_farms(building_gdf, farm_gdf, max_distance_km):
    nearby_farms_list = []
    for idx, building in building_gdf.iterrows():
        # Ensure correct order of coordinates (latitude, longitude)
        building_coords = (building["geometry"].y, building["geometry"].x)

        # Filter farms based on distance
        for j, farm in farm_gdf.iterrows():
            # Ensure correct order of coordinates (latitude, longitude)
            farm_coords = (farm["geometry"].y, farm["geometry"].x)
            distance = geodesic(building_coords, farm_coords).meters

            if distance <= max_distance_km:
                nearby_farms_list.append(
                    {"building_index": idx, "farm_index": j, "distance": distance}
                )

    # Convert the results list to a DataFrame
    nearby_farms_df = pd.DataFrame(nearby_farms_list)

    return nearby_farms_df

In [None]:
# Convert farm GeoDataFrame to dictionary for faster lookup
farm_dict = df_farm_geo.to_dict(orient="index")

max_distance = 10000  # 10 km

# Find all nearby farms for each building
nearby_farms = find_nearby_farms(df_building_geo, df_farm_geo, max_distance)

In [None]:
nearby_farms.head()

In [None]:
import json

with open('../data/processed_data/processed_buildings.geojson', 'r') as file:
    original_geojson = json.load(file)

nearby_farms_dict = {}
for _, row in nearby_farms.iterrows():
    if row["building_index"] not in nearby_farms_dict:
        nearby_farms_dict[row["building_index"]] = []
    nearby_farms_dict[row["building_index"]].append(
        {"farm_id": row["farm_index"], "distance": row["distance"]}
    )

# Enrich the original GeoJSON with the nearby farms data
for feature in original_geojson["features"]:
    building_index = feature["properties"]["egid"]
    if building_index in nearby_farms_dict:
        feature["properties"]["nearby_farms"] = nearby_farms_dict[building_index]

In [None]:
# save
output_filename = "../data/processed_data/enriched_buildings_with_farms.geojson"
with open(output_filename, "w") as file:
    json.dump(original_geojson, file, ensure_ascii=False, indent=4)